This is an anlysis of the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. It addressed major storms and weather events in the United States from 1950 to 2011 in terms of injuries, and property damage. As results, Tornado is the most harmful for population health, and Flood have the greatest economic consequences.
#load packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#download data
download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2","storm.data")
# read data
storm.data <- read.csv("storm.data")
#select variables
storm<-storm.data %>% select(
EVTYPE, FATALITIES, INJURIES,#directly related varibales
contains("DMG")#for damange related variables
)
#create prop.damage in proper units
storm<-storm%>%mutate(prop.dmg=case_when(
PROPDMGEXP == "H" ~ PROPDMG * 10^2,
PROPDMGEXP == "K" ~ PROPDMG * 10^3,
PROPDMGEXP == "M" ~ PROPDMG * 10^6,
PROPDMGEXP == "B" ~ PROPDMG * 10^9
))
#view it
storm%>%group_by(prop.dmg)%>%count()
## # A tibble: 1,655 × 2
## # Groups: prop.dmg [1,655]
## prop.dmg n
## <dbl> <int>
## 1 0 197195
## 2 10 850
## 3 20 46
## 4 30 1599
## 5 40 2
## 6 50 557
## 7 60 21
## 8 70 3
## 9 100 1426
## 10 110 27
## # … with 1,645 more rows
#create crop.damage in proper units
storm<-storm%>%mutate(crop.dmg=case_when(
CROPDMGEXP == "H" ~ CROPDMG * 10^2,
CROPDMGEXP == "K" ~ CROPDMG * 10^3,
CROPDMGEXP == "M" ~ CROPDMG * 10^6,
CROPDMGEXP == "B" ~ CROPDMG * 10^9
))
#view it
storm%>%group_by(crop.dmg)%>%count()
## # A tibble: 524 × 2
## # Groups: crop.dmg [524]
## crop.dmg n
## <dbl> <int>
## 1 0 261773
## 2 10 1
## 3 50 156
## 4 100 13
## 5 150 2
## 6 200 25
## 7 240 1
## 8 250 3
## 9 280 1
## 10 300 7
## # … with 514 more rows
health<-storm%>% group_by(EVTYPE) %>%
summarise(sum.pop=sum(FATALITIES+ INJURIES))%>%
arrange(desc(sum.pop))
library(ggplot2)
health[1:5,]%>%ggplot(aes(x = EVTYPE, y = sum.pop))+
geom_bar(stat = "identity", fill = "blue", las = 3)+
xlab("Event Type") + ylab("Fatalities") + ggtitle("Number of fatalities by top 10 Weather Events")
## Warning: Ignoring unknown parameters: las
From the barplot, Tornado has the highest fatalties and injuries number.
econ<-storm%>% group_by(EVTYPE) %>%
summarise(sum.econ=sum(prop.dmg + crop.dmg))%>%
arrange(desc(sum.econ))
library(ggplot2)
econ[1:5,]%>%ggplot(aes(x = EVTYPE, y = sum.econ))+
geom_bar(stat = "identity", fill = "blue", las = 3)+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
xlab("Event Type") + ylab("Damages ($)") + ggtitle("Property & Crop Damages by top 10 Weather Events")
## Warning: Ignoring unknown parameters: las
From the barplot, Flood has the highest negative economical impact (i.e., loss of money).