Introduction
Severe weather, like storms, can cause serious problems for
communities, from health risks to economic losses. These events often
result in deaths, injuries, and damage to property, so it’s crucial to
find ways to lessen their impact.
In this project, we’re digging into the NOAA storm database, which
records major storms and weather events across the U.S. It includes
information on when and where these events took place, as well as
estimates of the damage they caused, including fatalities, injuries, and
property damage.
Appendix
library(curl)
library(readr)
library(dplyr)
library(ggplot2)
library(knitr)
library(rmarkdown)
# Download the data provided by the course:
data.zip.url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
File.name <- "repdata_data_StormData.csv"
curl_download(data.zip.url, File.name) # Download locally
data <- read_csv(File.name) # Reading downloaded CSV
## Rows: 902297 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): BGN_DATE, BGN_TIME, TIME_ZONE, COUNTYNAME, STATE, EVTYPE, BGN_AZI,...
## dbl (18): STATE__, COUNTY, BGN_RANGE, COUNTY_END, END_RANGE, LENGTH, WIDTH, ...
## lgl (1): COUNTYENDN
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 37
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE BGN_RANGE
## <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <dbl>
## 1 1 4/18/1950… 0130 CST 97 MOBILE AL TORNA… 0
## 2 1 4/18/1950… 0145 CST 3 BALDWIN AL TORNA… 0
## 3 1 2/20/1951… 1600 CST 57 FAYETTE AL TORNA… 0
## 4 1 6/8/1951 … 0900 CST 89 MADISON AL TORNA… 0
## 5 1 11/15/195… 1500 CST 43 CULLMAN AL TORNA… 0
## 6 1 11/15/195… 2000 CST 77 LAUDERDALE AL TORNA… 0
## # ℹ 28 more variables: BGN_AZI <chr>, BGN_LOCATI <chr>, END_DATE <chr>,
## # END_TIME <chr>, COUNTY_END <dbl>, COUNTYENDN <lgl>, END_RANGE <dbl>,
## # END_AZI <chr>, END_LOCATI <chr>, LENGTH <dbl>, WIDTH <dbl>, F <dbl>,
## # MAG <dbl>, FATALITIES <dbl>, INJURIES <dbl>, PROPDMG <dbl>,
## # PROPDMGEXP <chr>, CROPDMG <dbl>, CROPDMGEXP <chr>, WFO <chr>,
## # STATEOFFIC <chr>, ZONENAMES <chr>, LATITUDE <dbl>, LONGITUDE <dbl>,
## # LATITUDE_E <dbl>, LONGITUDE_ <dbl>, REMARKS <chr>, REFNUM <dbl>
Storms.Data <- data
# Cleanup the data so that conlusions will be based on better results:
length(unique(Storms.Data$EVTYPE))
## [1] 977
Storms.Data$EVTYPE <- toupper(Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- sub("S$", "", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- sub("-$", "", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- sub("WND", "WIND", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- sub("TSTM", "THUNDERSTORM", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- sub("WINTRY", "WINTER", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- sub("WINTERY", "WINTER", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- sub("FLOODIN", "FLOOD", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- sub("WATERSPOUT", "WATER SPOUT", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- sub("WILDFIRE", "WILD FIRE", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- sub("/", " ", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- sub(" $", "", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- gsub("\\s+", " ", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- gsub(" TREE$", "", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- gsub(" MIX$", "", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- gsub(" MIXED$", "", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- gsub(" GORDON$", "", Storms.Data$EVTYPE)
Storms.Data$EVTYPE <- sapply(strsplit(
Storms.Data$EVTYPE, " "),
function(x) paste(unique(x),
collapse = " "))
# Calculate the Total Economic Damage
Storms.Data <- Storms.Data %>%
mutate(PROPDMGEXP = toupper(PROPDMGEXP),
CROPDMGEXP = toupper(CROPDMGEXP),
PROPDMGEXP = case_when(PROPDMGEXP == "K" ~ 1e3,
PROPDMGEXP == "M" ~ 1e6,
PROPDMGEXP == "B" ~ 1e9,
TRUE ~ 1),
CROPDMGEXP = case_when(CROPDMGEXP == "K" ~ 1e3,
CROPDMGEXP == "M" ~ 1e6,
CROPDMGEXP == "B" ~ 1e9,
TRUE ~ 1),
Total.Economic.Damage = PROPDMG * PROPDMGEXP + CROPDMG * CROPDMGEXP)
# Calculate the Population Harm by giving more weight to fatalities than to injuries:
Storms.Data$Weighted.Population.Harm <-
(Storms.Data$INJURIES * 3 +
Storms.Data$FATALITIES * 5)
Population.Harm <- Storms.Data %>%
group_by(EVTYPE) %>%
summarize(Weigghted.Harm = sum(Weighted.Population.Harm))