Storm events aren’t rare in the US. They often occur across the states causing damage to both properties and people. We analyzed the storm data from the NOAA between years 1950 and 2011. The population health damage was based on a weighted sum of the fatalities and the injuries, while the economic consequences were estimated from the crop and property damage. In general, Tornadoes seem to be the most damaging natural event.
if(!file.exists("Storm_data.csv.bz2")){
url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url,"Storm_data.csv.bz2")
}
data <- read.csv("Storm_data.csv.bz2")
head(data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data_tbl <- tbl_df(data)
data_tbl_health <- select(data_tbl,c("EVTYPE","FATALITIES","INJURIES"))
data_tbl_economic <- select(data_tbl,c("EVTYPE","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP"))
# Calculate the damage
data_tbl_health$Damage <- data_tbl_health$INJURIES + 10*data_tbl_health$FATALITIES
library(ggplot2)
# calculate the mean damage per event
data_tbl_health_summary <- data_tbl_health %>% group_by(EVTYPE) %>% summarise(mean_damage = mean(Damage)) %>% arrange(desc(mean_damage)) %>% slice(1:10)
ggplot(data_tbl_health_summary,aes(x= reorder(EVTYPE,-mean_damage),mean_damage))+
geom_bar(stat = "identity")+
labs(x="Event Type")+
labs(y="Mean Health Damage")+
labs(title="Top Events by Mean Health Damage")+
theme(axis.text.x = element_text(angle = 60,face="bold",size=6,hjust = 1))
We can conclude the Tornadoes are the most significant event in terms of mean health damage followed by Cold & Snow.
print(table(data_tbl_economic$PROPDMGEXP))
##
## - ? + 0 1 2 3 4 5 6
## 465934 1 8 5 216 25 13 4 4 28 4
## 7 8 B h H K m M
## 5 1 40 1 6 424665 7 11330
print(table(data_tbl_economic$CROPDMGEXP))
##
## ? 0 2 B k K m M
## 618413 7 19 1 9 21 281832 1 1994
There are huge number of empty cells. “k” expression is also very common.
library(stringr)
# Use lower case
data_tbl_economic$CROPDMGEXP <- tolower(data_tbl_economic$CROPDMGEXP)
data_tbl_economic$PROPDMGEXP <- tolower(data_tbl_economic$PROPDMGEXP)
DMG_EXP <- function(x){
#create a zero vector
EXP_num <- rep(0,length=dim(x)[1])
#Most common operator is k
EXP_num[x[,2]=="k"] = 1000
#exclude empty cells
other = which(x[,2]!="" & x[,2]!="k")
#Assign values to other experessions
multiplier = c(1,100,1e6,1e9)
names_multiplier <- c("+","h","m","b")
names(multiplier) <- names_multiplier
nums = as.character(1:8)
#loop over remaining cells and set appropiate multiplier.
for (i in other){
exp = as.character(x[i,2])
if(exp %in% names_multiplier){
EXP_num[i] <- multiplier[exp]
} else if (exp %in% nums){
EXP_num[i] <- 10
}
}
EXP_num
}
#Calucalte the damage
data_tbl_economic$PROP <- data_tbl_economic$PROPDMG*DMG_EXP(data_tbl_economic[2:3])
data_tbl_economic$CROP <- data_tbl_economic$CROPDMG*DMG_EXP(data_tbl_economic[4:5])
data_tbl_economic$Damage <- data_tbl_economic$PROP + data_tbl_economic$CROP
# calculate the mean damage per event
data_tbl_economic_summary <- data_tbl_economic %>% group_by(EVTYPE) %>% summarise(mean_damage = mean(Damage)/1e6) %>% arrange(desc(mean_damage)) %>% slice(1:10)
ggplot(data_tbl_economic_summary,aes(x= reorder(EVTYPE,-mean_damage),mean_damage))+
geom_bar(stat = "identity")+
labs(x="Event Type")+
labs(y="Economic Damage [million USD")+
labs(title="Top Events by Mean Economic Damage")+
theme(axis.text.x = element_text(angle = 60,face="bold",size=8,hjust = 1))
We can conclude the Tornadoes are the most significant event in terms of mean economic damage followed by heavy rain and severe weather.