In this report, we aim to analyze the impact of different weather events on public health and economy based on the storm database collected from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) from 1950 - 2011. We will use the estimates of fatalities, injuries, property and crop damage to decide which types of event are most harmful to the population health and economy. From these data, we found that excessive heat and tornado are most harmful with respect to population health, while flood, drought, and hurricane/typhoon have the greatest economic consequences.
echo = TRUE # Always make code visible
options(scipen = 1) # Turn off scientific notations for numbers
library(R.utils)
## Warning: package 'R.utils' was built under R version 3.4.4
## Loading required package: R.oo
## Warning: package 'R.oo' was built under R version 3.4.4
## Loading required package: R.methodsS3
## Warning: package 'R.methodsS3' was built under R version 3.4.4
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.22.0 (2018-04-21) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
## R.utils v2.7.0 successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
##
## timestamp
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
library(plyr)
## Warning: package 'plyr' was built under R version 3.4.4
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.4
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Dowloading data if it's not already done
if(!file.exists("stormData.csv.bz2")) {
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
destfile = "stormData.csv.bz2", method = "curl")
}
# Loading data unzip(zipfile = temp, exdir = temp2)
storeData <- bzfile("stormData.csv.bz2")
stormData <- read.csv(storeData)
# Subset (NOAA) storm database
stormData <- stormData[,c('EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]
Stucture of stormData
str(stormData)
## 'data.frame': 902297 obs. of 7 variables:
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
let’s see level from PROPDMGEXP and CROPDMGEXP and refactor them
table(is.na(stormData$PROPDMGEXP))
##
## FALSE
## 902297
stormData$PROPDMGEXP1 <- stormData$PROPDMGEXP
stormData$CROPDMGEXP1 <- stormData$CROPDMGEXP
Let’s see the different levels
levels(stormData$PROPDMGEXP1)
## [1] "" "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K"
## [18] "m" "M"
Refactor PROPDMGEXP
stormData$PROPDMGEXP1 <- mapvalues(stormData$PROPDMGEXP1, from = c("K", "M","", "B", "m", "+", "0", "5", "6", "?", "4", "2", "3", "h", "7", "H", "-", "1", "8"), to = c(10^3, 10^6, 1, 10^9, 10^6, 0,1,10^5, 10^6, 0, 10^4, 10^2, 10^3, 10^2, 10^7, 10^2, 0, 10, 10^8))
New levels
levels(stormData$PROPDMGEXP1)
## [1] "1" "0" "10" "100" "1000" "10000" "100000"
## [8] "1e+06" "1e+07" "1e+08" "1e+09"
stormData$PROPDMGEXP1 <- as.numeric(as.character(stormData$PROPDMGEXP1))
Refactor CROPDMGEXP
stormData$CROPDMGEXP1 <- mapvalues(stormData$CROPDMGEXP1, from = c("","M", "K", "m", "B", "?", "0", "k","2"), to = c(1,10^6, 10^3, 10^6, 10^9, 0, 1, 10^3, 10^2))
stormData$CROPDMGEXP1 <- as.numeric(as.character(stormData$CROPDMGEXP1))
stormData <- mutate(stormData, damage = CROPDMGEXP1*CROPDMG+PROPDMGEXP1*PROPDMG )
## Warning: package 'bindrcpp' was built under R version 3.4.4
fatalities <- aggregate(FATALITIES ~ EVTYPE, data = stormData, FUN="sum")
fatalities <- arrange(fatalities, desc(FATALITIES))[1:10,]
View of the top ten harmful weather event according to fatalities
fatalities
## EVTYPE FATALITIES
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
## 6 TSTM WIND 504
## 7 FLOOD 470
## 8 RIP CURRENT 368
## 9 HIGH WIND 248
## 10 AVALANCHE 224
fatalities$EVTYPE <- factor(fatalities$EVTYPE, levels = fatalities$EVTYPE)
g <- ggplot(fatalities, aes(x = EVTYPE, y = FATALITIES))
g+ geom_bar(stat = "identity", color = "red", fill = "steelblue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Fatalities") + ggtitle("Number of fatalities by top 10 Weather Events")
injuries <- aggregate(INJURIES ~ EVTYPE, data = stormData, FUN="sum")
injuries <- arrange(injuries, desc(INJURIES))[1:10,]
View of the top ten harmful weather event according to injuries
injuries
## EVTYPE INJURIES
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
## 6 HEAT 2100
## 7 ICE STORM 1975
## 8 FLASH FLOOD 1777
## 9 THUNDERSTORM WIND 1488
## 10 HAIL 1361
injuries$EVTYPE <- factor(injuries$EVTYPE, levels = injuries$EVTYPE)
g <- ggplot(injuries, aes(x = EVTYPE, y = INJURIES))
g+ geom_bar(stat = "identity", color = "red", fill = "green") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Fatalities") + ggtitle("Number of injuries by top 10 Weather Events")
Cost_damages <- aggregate(damage ~ EVTYPE, data=stormData, sum)
Cost_damages <- arrange(Cost_damages, desc(damage))[1:10,]
Cost_damages
## EVTYPE damage
## 1 FLOOD 150319678257
## 2 HURRICANE/TYPHOON 71913712800
## 3 TORNADO 57362333887
## 4 STORM SURGE 43323541000
## 5 HAIL 18761221986
## 6 FLASH FLOOD 18243991079
## 7 DROUGHT 15018672000
## 8 HURRICANE 14610229010
## 9 RIVER FLOOD 10148404500
## 10 ICE STORM 8967041360
Cost_damages$EVTYPE <- factor(Cost_damages$EVTYPE, levels = Cost_damages$EVTYPE)
g <- ggplot(Cost_damages, aes(x = EVTYPE, y = damage))
g+geom_bar(stat = "identity", fill = "red", color = "blue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Damages ($)") + ggtitle("Property & Crop Damages by top 10 Weather Events")