The goal of the assignment is to explore the NOAA Storm Database and explore the effects of severe weather events on both population and economy.The database covers the time period between 1950 and November 2011.
The following analysis investigates which types of severe weather events are most harmful on:
library("data.table")
library("ggplot2")
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("reshape2")
##
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
##
## dcast, melt
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = paste0(getwd(), '/repdata%2Fdata%2FStormData.csv.bz2'), method = "curl")
data <- read.csv("repdata%2Fdata%2FStormData.csv.bz2")
names(data)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
data1 <- subset(data,select=c("PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP","FATALITIES","INJURIES","EVTYPE"))
data1 <- arrange(data1,EVTYPE)
head(data1,n=5)
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP FATALITIES INJURIES
## 1 200 K 0 0 0
## 2 0 0 0 0
## 3 50 K 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## EVTYPE
## 1 HIGH SURF ADVISORY
## 2 COASTAL FLOOD
## 3 FLASH FLOOD
## 4 LIGHTNING
## 5 TSTM WIND
data1 <- subset(data1,EVTYPE != "?" &(INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0))
head(data1,n=5)
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP FATALITIES INJURIES
## 1 200 K 0 0 0
## 3 50 K 0 0 0
## 7 100 K 0 0 0
## 8 8 M 0 0 0
## 9 8 K 0 0 0
## EVTYPE
## 1 HIGH SURF ADVISORY
## 3 FLASH FLOOD
## 7 TSTM WIND
## 8 TSTM WIND
## 9 TSTM WIND (G45)
data1 <- data.table(data1)
cols <- c("PROPDMGEXP", "CROPDMGEXP")
data1[, (cols) := c(lapply(.SD, toupper)), .SDcols = cols]
# Map property damage alphanumeric exponents to numeric values.
propDmgKey <- c("\"\"" = 10^0,
"K" = 10^3,
"M" = 10^6)
# Map crop damage alphanumeric exponents to numeric values
cropDmgKey <- c("\"\"" = 10^0,
"?" = 10^0,
"0" = 10^0,
"K" = 10^3,
"M" = 10^6)
data1[, PROPDMGEXP := propDmgKey[as.character(data1[,PROPDMGEXP])]]
data1[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]
data1[, CROPDMGEXP := cropDmgKey[as.character(data1[,CROPDMGEXP])] ]
data1[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]
head(data1,n=10)
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP FATALITIES INJURIES
## 1: 200 1e+03 0.00 1e+00 0 0
## 2: 50 1e+03 0.00 1e+00 0 0
## 3: 100 1e+03 0.00 1e+00 0 0
## 4: 8 1e+06 0.00 1e+00 0 0
## 5: 8 1e+03 0.00 1e+00 0 0
## 6: 0 1e+00 1.48 1e+06 0 0
## 7: 0 1e+00 17.96 1e+06 0 0
## 8: 0 1e+00 9.38 1e+06 0 0
## 9: 5 1e+03 0.00 1e+00 0 0
## 10: 1 1e+06 0.00 1e+00 0 0
## EVTYPE
## 1: HIGH SURF ADVISORY
## 2: FLASH FLOOD
## 3: TSTM WIND
## 4: TSTM WIND
## 5: TSTM WIND (G45)
## 6: AGRICULTURAL FREEZE
## 7: AGRICULTURAL FREEZE
## 8: AGRICULTURAL FREEZE
## 9: APACHE COUNTY
## 10: ASTRONOMICAL HIGH TIDE
data1 <- mutate(data1,propCost = PROPDMG * PROPDMGEXP,cropCost = CROPDMG * CROPDMGEXP)
data1 <- data.table(data1)
total_c <- data1[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]
total_c <- total_c[order(-Total_Cost), ]
total_c <- total_c[1:10, ]
head(total_c,n=5)
## EVTYPE propCost cropCost Total_Cost
## 1: TORNADO 51637160784 414953270 52052114054
## 2: FLOOD 22157709930 5661968450 27819678380
## 3: HAIL 13932267050 3025954473 16958221523
## 4: FLASH FLOOD 15140812068 1421317100 16562129168
## 5: DROUGHT 1046106000 12472566002 13518672002
data1 <- data.table(data1)
total_i<- data1[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
total_i<- total_i[order(-FATALITIES), ]
total_i <- total_i[1:10, ]
head(total_i,n=5)
## EVTYPE FATALITIES INJURIES totals
## 1: TORNADO 5633 91346 96979
## 2: EXCESSIVE HEAT 1903 6525 8428
## 3: FLASH FLOOD 978 1777 2755
## 4: HEAT 937 2100 3037
## 5: LIGHTNING 816 5230 6046
pop_health <- melt(total_i, id.vars="EVTYPE", variable.name = "THINGS",value.name="VALUE")
head(pop_health,n=5)
## EVTYPE THINGS VALUE
## 1: TORNADO FATALITIES 5633
## 2: EXCESSIVE HEAT FATALITIES 1903
## 3: FLASH FLOOD FATALITIES 978
## 4: HEAT FATALITIES 937
## 5: LIGHTNING FATALITIES 816
econ_con<- melt(total_c, id.vars="EVTYPE", variable.name = "THINGS",value.name="VALUE")
head(econ_con,n=5)
## EVTYPE THINGS VALUE
## 1: TORNADO propCost 51637160784
## 2: FLOOD propCost 22157709930
## 3: HAIL propCost 13932267050
## 4: FLASH FLOOD propCost 15140812068
## 5: DROUGHT propCost 1046106000
health <- ggplot(pop_health, aes(x=reorder(EVTYPE, -VALUE), y=VALUE))
health = health + geom_bar(stat="identity", aes(fill=THINGS), position="dodge")
health = health + ylab("Frequency Count")
health = health + xlab("Event Type")
health = health + theme(axis.text.x = element_text(angle=45, hjust=1))
health = health + ggtitle("Top 10 US Killers") + theme(plot.title = element_text(hjust = 0.5))
health
econ <- ggplot(econ_con, aes(x=reorder(EVTYPE, -VALUE), y=VALUE))
econ = econ + geom_bar(stat="identity", aes(fill=THINGS), position="dodge")
econ = econ + ylab("Cost (dollars)")
econ = econ + xlab("Event Type")
econ = econ + theme(axis.text.x = element_text(angle=45, hjust=1))
econ = econ + ggtitle("Top 10 US Storm Events causing economic Consequences") + theme(plot.title = element_text(hjust = 0.5))
econ