This data analysis involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database, which tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
This report addresses two questions:
1. Across the United States, which types of events are most harmful with respect to population health?
2. Across the United States, which types of events have the greatest economic consequences?
The reason that I read the data from the local file instead of using download.file() here is that MY NETWORK SPEED IS SO SLOW! Everytime I tried to use download.file(), it got stuck and only a portion of the full size was downloaded. So, I have to download the file via my Chrome to my local repository first and use read.csv() to load it. The data can be found HERE.
library(knitr)
library(readr)
sdata <- read.csv("StormData.csv", sep = ",")
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
st <- sdata %>% mutate(BGN_DATE = mdy_hms(BGN_DATE))
st <- st %>% filter(BGN_DATE >= "1996-01-01") %>%
select(BGN_DATE, EVTYPE, FATALITIES, INJURIES,
PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
st$PROPDMGEXP[st$PROPDMGEXP == "B"] <- 10^9
st$PROPDMGEXP[st$PROPDMGEXP == "M"] <- 10^6
st$PROPDMGEXP[st$PROPDMGEXP == "K"] <- 10^3
st$PROPDMGEXP[st$PROPDMGEXP == ""] <- 0
st$CROPDMGEXP[st$CROPDMGEXP == "B"] <- 10^9
st$CROPDMGEXP[st$CROPDMGEXP == "M"] <- 10^6
st$CROPDMGEXP[st$CROPDMGEXP == "K"] <- 10^3
st$CROPDMGEXP[st$CROPDMGEXP == ""] <- 0
st <- st %>% mutate(PROPDMGEXP = as.numeric(PROPDMGEXP),
CROPDMGEXP = as.numeric(CROPDMGEXP))
names(st) <- tolower(names(st))
st <- st %>% mutate(ecnmcloss = propdmg * propdmgexp + cropdmg * cropdmgexp,
hlthloss = fatalities + 0.3 * injuries) %>%
select(bgn_date:injuries, ecnmcloss, hlthloss)
events48 <- read.csv("events.csv")
head(events48, 5)
tail(events48, 5)
library(stringr)
library(stringdist)
## Warning: package 'stringdist' was built under R version 4.0.2
st$evtype <- str_to_title(st$evtype)
st$evtype <- gsub("Tstm", "Thunderstorm", st$evtype)
st <- st %>% mutate(evclass = events48$events[amatch(evtype, events48$events, maxDist = 100)])
library(ggplot2)
ev_hlth <- st %>% group_by(evclass) %>%
summarize(health_loss = sum(hlthloss)) %>%
rename(event = evclass) %>% arrange(desc(health_loss))
## `summarise()` ungrouping output (override with `.groups` argument)
ev_hlth_top5 <- ev_hlth[1:5, ]
g1 <- ggplot(data = ev_hlth_top5,
aes(x = reorder(event, -health_loss), y = health_loss))
g1 + geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = health_loss), vjust = -0.3, size = 3) +
labs(x = "Event") + labs(y = "Loss") +
labs(title = "Population Health Loss v. Severe Weather Event in US Since 1996") +
labs(subtitle = "Note: Loss = 1 * fatalities + 0.3 * Injuries") +
theme_classic()
library(ggplot2)
ev_eco <- st %>% group_by(evclass) %>%
summarize(economic_loss = sum(ecnmcloss)) %>%
rename(event = evclass) %>% arrange(desc(economic_loss))
## `summarise()` ungrouping output (override with `.groups` argument)
ev_eco_top5 <- ev_eco[1:5, ]
ev_eco_top5 <- ev_eco_top5 %>% mutate(scl_eco_loss = round(economic_loss / 10^9, digits = 1))
g1 <- ggplot(data = ev_eco_top5,
aes(x = reorder(event, -scl_eco_loss), y = scl_eco_loss))
g1 + geom_bar(stat = "identity", fill = "brown") +
geom_text(aes(label = scl_eco_loss), vjust = -0.3, size = 3) +
labs(x = "Event") + labs(y = "Loss") +
labs(title = "Economic Loss v. Severe Weather Event in US Since 1996") +
labs(subtitle = "Note: Economic Loss = Property Loss + Crop Loss (in billions)") +
theme_classic()