This study is focused on the severe weather events, which affect to both public health and economic problems for communities and municipalities in the United States based on data from the National Oceanic and Atmospheric Administration (NOAA) start in the year 1950 and end in November 2011. The conclusion of the study is described as following:
Tornado is the most harmful event, which affects to the public health in terms of both fatalities and injuries.
Flood is the most harmful event, which affects to the economy in terms of both property and crop damage.
The following code is used to load the code library and download the data from the source:
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
fileURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileURL, "./StormData.csv.bz2")
data <- read.csv("./StormData.csv.bz2")
head(data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
data.h <- select(data, c(8, 23, 24))
sum.data.h <- data.h %>% group_by(EVTYPE) %>% summarise(Sum.FAT = sum(FATALITIES, na.rm = TRUE), Sum.INJ = sum(INJURIES, na.rm = TRUE)) %>% mutate(Total.Health = Sum.FAT+Sum.INJ) %>% arrange(desc(Total.Health)) %>% top_n(n = 5, wt = Total.Health) ## Summarize, rank, and select the top-5 events
The following code is used to determine the events that cause the greatest economic consequences.
data.e <- select(data, c(8, 25:28))
symbol <- sort(unique(as.character(data.e$PROPDMGEXP)))
exp <- c <- c(rep(0, 3), 1, rep(10, 9), 1e9, rep(1e2, 2), 1e3, rep(1e6, 2))
conv.tab <- data.frame(symbol, exp) ## create the conversion table
data.e$PROPDMGCONV <- conv.tab$exp[match(data.e$PROPDMGEXP, conv.tab$symbol)]
data.e$CROPDMGCONV <- conv.tab$exp[match(data.e$CROPDMGEXP, conv.tab$symbol)]
data.e <- data.e %>% mutate(PROPDMG = PROPDMG*PROPDMGCONV) %>% mutate(CROPDMG = CROPDMG*CROPDMGCONV) %>% mutate(TOTALDMG = PROPDMG+CROPDMG)
sum.data.e <- data.e %>% group_by(EVTYPE) %>% summarise(Sum.PROP = sum(PROPDMG, na.rm = TRUE), Sum.CROP = sum(CROPDMG, na.rm = TRUE)) %>% mutate(Total.ECON = Sum.PROP+Sum.CROP) %>% arrange(desc(Total.ECON)) %>% top_n(n = 5, wt = Total.ECON) ## Summarize, rank, and select the top-5 events
print(sum.data.h)
## # A tibble: 5 x 4
## EVTYPE Sum.FAT Sum.INJ Total.Health
## <fct> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
health <- sum.data.h %>% gather("Sum.FAT", "Sum.INJ", key = "H.Type", value = "Quantity", factor_key = TRUE) %>% select(-Total.Health) ## Tidy up the data
plot1 <- ggplot(health, aes(x = reorder(EVTYPE, -Quantity), Quantity), fill = H.Type)
plot1+geom_bar(aes(fill = H.Type), stat = "identity", position = "stack")+labs(title = "Types of events that most harmful \n with respect to population health across the United States", x = "Event type", y = "Number of population")+theme(plot.title = element_text(hjust = 0.5))+scale_fill_discrete(name = "", labels = c("Fatalities", "Injuries"))
print(sum.data.e)
## # A tibble: 5 x 4
## EVTYPE Sum.PROP Sum.CROP Total.ECON
## <fct> <dbl> <dbl> <dbl>
## 1 FLOOD 144657709800 5661968450 150319678250
## 2 HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3 TORNADO 56937162897 414954710 57352117607
## 4 STORM SURGE 43323536000 5000 43323541000
## 5 HAIL 15732269877 3025537650 18757807527
econ <- sum.data.e %>% gather("Sum.PROP", "Sum.CROP", key = "E.Type", value = "Amount", factor_key = TRUE) %>% select(-Total.ECON) ## Tidy up the data
plot2 <- ggplot(econ, aes(x = reorder(EVTYPE, -Amount), Amount/1e9), fill = E.Type)
plot2+geom_bar(aes(fill = E.Type), stat = "identity", position = "stack")+labs(title = "Types of events that have the greatest \n economic consequences across the United States", x = "Event type", y = "Cost of damages: Billon USD")+theme(plot.title = element_text(hjust = 0.5))+ scale_fill_discrete(name = "", labels = c("Properties", "Crops"))