The goal of the assignment is to explore the NOAA Storm Database and explore the effects of severe weather events on both population and economy.The database covers the time period between 1950 and November 2011.
The following analysis investigates which types of severe weather events are most harmful on:
Information on the Data: Documentation
setwd("~/Course 2 Assignment 5(2.)")
library("knitr")
library("plyr")
## Warning: package 'plyr' was built under R version 4.1.3
library("dplyr")
## Warning: package 'dplyr' was built under R version 4.1.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.1.3
library("lattice")
library("datasets")
library("data.table")
## Warning: package 'data.table' was built under R version 4.1.3
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library("grid")
library("gridExtra")
## Warning: package 'gridExtra' was built under R version 4.1.3
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
link<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(link, destfile = paste0("~/Course 2 Assignment 5(2.)", "/repdata_data_StormData.csv.bz2"))
a <- fread("repdata_data_StormData.csv.bz2")
length(unique(a$EVTYPE))
## [1] 985
aDT<-as.data.table(a)
colnames(aDT)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
dim(aDT)
## [1] 902297 37
aDTRemove <- colnames(aDT[, !c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")])
aDT[, c(aDTRemove) := NULL]
aDT <- aDT[(EVTYPE != "?" & (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)), c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
cols <- c("PROPDMGEXP", "CROPDMGEXP")
aDT[, (cols) := c(lapply(.SD, toupper)), .SDcols = cols]
propDmgKey<-c("\"\"" = 10^0,"-" = 10^0,"+" = 10^0,"0" = 10^0,"1" = 10^1,"2" = 10^2,"3" = 10^3,"4" = 10^4,"5" = 10^5,"6" = 10^6,"7" = 10^7,"8" = 10^8,"9" = 10^9,"H" = 10^2,"K" = 10^3,"M" = 10^6,"B" = 10^9)
cropDmgKey <- c("\"\"" = 10^0,"?" = 10^0,"0" = 10^0,"K" = 10^3,"M" = 10^6,"B" = 10^9)
aDT[, PROPDMGEXP := propDmgKey[as.character(aDT[,PROPDMGEXP])]]
aDT[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]
aDT[, CROPDMGEXP := cropDmgKey[as.character(aDT[,CROPDMGEXP])] ]
aDT[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]
aDT <- aDT[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]
totalCostDT <- aDT[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]
totalCostDT <- totalCostDT[order(-Total_Cost), ]
totalCostDT <- totalCostDT[1:10, ]
head(totalCostDT, 5)
## EVTYPE propCost cropCost Total_Cost
## 1: FLOOD 144657709807 5661968450 150319678257
## 2: HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3: TORNADO 56947380677 414953270 57362333947
## 4: STORM SURGE 43323536000 5000 43323541000
## 5: HAIL 15735267513 3025954473 18761221986
totalInjuriesDT <- aDT[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
totalInjuriesDT <- totalInjuriesDT[order(-FATALITIES), ]
totalInjuriesDT <- totalInjuriesDT[1:10, ]
head(totalInjuriesDT, 5)
## EVTYPE FATALITIES INJURIES totals
## 1: TORNADO 5633 91346 96979
## 2: EXCESSIVE HEAT 1903 6525 8428
## 3: FLASH FLOOD 978 1777 2755
## 4: HEAT 937 2100 3037
## 5: LIGHTNING 816 5230 6046
##5:Plot
bad_stuff <- melt(totalInjuriesDT, id.vars="EVTYPE", variable.name = "bad_thing")
head(bad_stuff, 5)
## EVTYPE bad_thing value
## 1: TORNADO FATALITIES 5633
## 2: EXCESSIVE HEAT FATALITIES 1903
## 3: FLASH FLOOD FATALITIES 978
## 4: HEAT FATALITIES 937
## 5: LIGHTNING FATALITIES 816
###Most Harmful Events to Population Health
healthChart <- ggplot(bad_stuff, aes(x=reorder(EVTYPE, -value), y=value))
healthChart = healthChart + geom_bar(stat="identity", aes(fill=bad_thing), position="dodge")
healthChart = healthChart + ylab("Frequency Count")
healthChart = healthChart + xlab("Event Type")
healthChart = healthChart + theme(axis.text.x = element_text(angle=45, hjust=1))
healthChart = healthChart + ggtitle("Top 10 US Killers") + theme(plot.title = element_text(hjust = 0.5))
healthChart
###Events that have the Greatest Economic Consequences
econ<-melt(totalCostDT, id.vars="EVTYPE", variable.name = "Damage_Type")
econChart <- ggplot(econ, aes(x=reorder(EVTYPE, -value), y=value))
econChart = econChart + geom_bar(stat="identity", aes(fill=Damage_Type), position="dodge")
econChart = econChart + ylab("Cost (dollars)")
econChart = econChart + xlab("Event Type")
econChart = econChart + theme(axis.text.x = element_text(angle=45, hjust=1))
econChart = econChart + ggtitle("Top 10 US Storm Events causing Economic Consequences") + theme(plot.title = element_text(hjust = 0.5))
econChart