1: Synopsis

The goal of the assignment is to explore the NOAA Storm Database and explore the effects of severe weather events on both population and economy.The database covers the time period between 1950 and November 2011.

The following analysis investigates which types of severe weather events are most harmful on:

  1. Health (injuries and fatalities)
  2. Property and crops (economic consequences)

Information on the Data: Documentation

2: Data Processing

setwd("~/Course 2 Assignment 5(2.)")
library("knitr")
library("plyr")
## Warning: package 'plyr' was built under R version 4.1.3
library("dplyr")
## Warning: package 'dplyr' was built under R version 4.1.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.1.3
library("lattice")
library("datasets")
library("data.table")
## Warning: package 'data.table' was built under R version 4.1.3
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library("grid")
library("gridExtra")
## Warning: package 'gridExtra' was built under R version 4.1.3
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
link<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(link, destfile = paste0("~/Course 2 Assignment 5(2.)", "/repdata_data_StormData.csv.bz2"))
a <- fread("repdata_data_StormData.csv.bz2")
length(unique(a$EVTYPE))
## [1] 985
aDT<-as.data.table(a)
colnames(aDT)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
dim(aDT)
## [1] 902297     37

3:Data Subsetting

aDTRemove <- colnames(aDT[, !c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")])
aDT[, c(aDTRemove) := NULL]
aDT <- aDT[(EVTYPE != "?" & (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)), c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]


cols <- c("PROPDMGEXP", "CROPDMGEXP")
aDT[,  (cols) := c(lapply(.SD, toupper)), .SDcols = cols]


propDmgKey<-c("\"\"" = 10^0,"-" = 10^0,"+" = 10^0,"0" = 10^0,"1" = 10^1,"2" = 10^2,"3" = 10^3,"4" = 10^4,"5" = 10^5,"6" = 10^6,"7" = 10^7,"8" = 10^8,"9" = 10^9,"H" = 10^2,"K" = 10^3,"M" = 10^6,"B" = 10^9)
cropDmgKey <-  c("\"\"" = 10^0,"?" = 10^0,"0" = 10^0,"K" = 10^3,"M" = 10^6,"B" = 10^9)

aDT[, PROPDMGEXP := propDmgKey[as.character(aDT[,PROPDMGEXP])]]
aDT[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]

aDT[, CROPDMGEXP := cropDmgKey[as.character(aDT[,CROPDMGEXP])] ]
aDT[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]

aDT <- aDT[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]

4:Calcuating Total Property and Crop Cost & Fatalities and Injuries

totalCostDT <- aDT[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]
totalCostDT <- totalCostDT[order(-Total_Cost), ]
totalCostDT <- totalCostDT[1:10, ]
head(totalCostDT, 5)
##               EVTYPE     propCost   cropCost   Total_Cost
## 1:             FLOOD 144657709807 5661968450 150319678257
## 2: HURRICANE/TYPHOON  69305840000 2607872800  71913712800
## 3:           TORNADO  56947380677  414953270  57362333947
## 4:       STORM SURGE  43323536000       5000  43323541000
## 5:              HAIL  15735267513 3025954473  18761221986
totalInjuriesDT <- aDT[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
totalInjuriesDT <- totalInjuriesDT[order(-FATALITIES), ]
totalInjuriesDT <- totalInjuriesDT[1:10, ]
head(totalInjuriesDT, 5)  
##            EVTYPE FATALITIES INJURIES totals
## 1:        TORNADO       5633    91346  96979
## 2: EXCESSIVE HEAT       1903     6525   8428
## 3:    FLASH FLOOD        978     1777   2755
## 4:           HEAT        937     2100   3037
## 5:      LIGHTNING        816     5230   6046

##5:Plot

bad_stuff <- melt(totalInjuriesDT, id.vars="EVTYPE", variable.name = "bad_thing")
head(bad_stuff, 5)
##            EVTYPE  bad_thing value
## 1:        TORNADO FATALITIES  5633
## 2: EXCESSIVE HEAT FATALITIES  1903
## 3:    FLASH FLOOD FATALITIES   978
## 4:           HEAT FATALITIES   937
## 5:      LIGHTNING FATALITIES   816
###Most Harmful Events to Population Health
healthChart <- ggplot(bad_stuff, aes(x=reorder(EVTYPE, -value), y=value))
healthChart = healthChart + geom_bar(stat="identity", aes(fill=bad_thing), position="dodge")
healthChart = healthChart + ylab("Frequency Count") 
healthChart = healthChart + xlab("Event Type") 
healthChart = healthChart + theme(axis.text.x = element_text(angle=45, hjust=1))
healthChart = healthChart + ggtitle("Top 10 US Killers") + theme(plot.title = element_text(hjust = 0.5))
healthChart

###Events that have the Greatest Economic Consequences
econ<-melt(totalCostDT, id.vars="EVTYPE", variable.name = "Damage_Type")
econChart <- ggplot(econ, aes(x=reorder(EVTYPE, -value), y=value))
econChart = econChart + geom_bar(stat="identity", aes(fill=Damage_Type), position="dodge")
econChart = econChart + ylab("Cost (dollars)") 
econChart = econChart + xlab("Event Type") 
econChart = econChart + theme(axis.text.x = element_text(angle=45, hjust=1))
econChart = econChart + ggtitle("Top 10 US Storm Events causing Economic Consequences") + theme(plot.title = element_text(hjust = 0.5))
econChart