R version 4.0.3 (2020-10-10) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 19042)

Synopsis

The basic goal of this assignment is to explore the NOAA Storm Database and answer the following questions:

Across the United States, which types of events (as indicated by the EVTYPE variable are most harmful with respect to population health?
Across the United States, which types of events have the greatest economic consequences?

Loading libraries

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(ggplot2)

loading and defining storm data

if (!file.exists("repdata_data_StormData.csv.bz2") )
    {
     url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"  
     download.file(url,destfile='repdata_data_StormData.csv.bz2',mode='wb')  
    }
stormdata <- read.csv(bzfile("repdata_data_StormData.csv.bz2"), stringsAsFactors = FALSE, header=TRUE)

Preparing data for analysis

# Select needed columns
data <- stormdata[,c('EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]

# Add scaling factors for convenience
data$CROPDMG_ = 0
data[data$CROPDMGEXP == "H",]$CROPDMG_ = data[data$CROPDMGEXP == "H",]$CROPDMG * 1000
data[data$CROPDMGEXP == "K",]$CROPDMG_ = data[data$CROPDMGEXP == "K",]$CROPDMG * 1000
data[data$CROPDMGEXP == "M",]$CROPDMG_ = data[data$CROPDMGEXP == "M",]$CROPDMG * 10^6
data[data$CROPDMGEXP == "B",]$CROPDMG_ = data[data$CROPDMGEXP == "B",]$CROPDMG * 10^9

data$PROPDMG_ = 0
data[data$PROPDMGEXP == "H",]$PROPDMG_ = data[data$PROPDMGEXP == "H",]$PROPDMG * 100
data[data$PROPDMGEXP == "K",]$PROPDMG_ = data[data$PROPDMGEXP == "K",]$PROPDMG * 1000
data[data$PROPDMGEXP == "M",]$PROPDMG_ = data[data$PROPDMGEXP == "M",]$PROPDMG * 10^6
data[data$PROPDMGEXP == "B",]$PROPDMG_ = data[data$PROPDMGEXP == "B",]$PROPDMG * 10^9

Which types of events are most harmful with respect to population health ?

fatalities <- aggregate(FATALITIES ~ EVTYPE, data=data, sum)
fatalities <- fatalities[order(-fatalities$FATALITIES), ][1:10,]
fatalities$EVTYPE <- factor(fatalities$EVTYPE, levels = fatalities$EVTYPE)

ggplot(fatalities, aes(x = EVTYPE, y = FATALITIES)) + 
    geom_bar(stat = "identity", fill = "blue") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("") + ylab("Fatalities") + ggtitle("Fatalities by Events")

injuries <- aggregate(INJURIES ~ EVTYPE, data=data, sum)
injuries <- injuries[order(-injuries$INJURIES), ][1:10,]
injuries$EVTYPE <- factor(injuries$EVTYPE, levels = injuries$EVTYPE)

ggplot(injuries, aes(x = EVTYPE, y = INJURIES)) + 
    geom_bar(stat = "identity", fill = "blue") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("") + ylab("Injuries") + ggtitle("Injuries by Events")

Across the United States, which types of events have the greatest economic consequences?

damages <- aggregate(PROPDMG_ + CROPDMG_ ~ EVTYPE, data=data, sum)
names(damages) = c("EVTYPE", "DAMAGES")
damages <- damages[order(-damages$DAMAGES), ][1:10,]
damages$EVTYPE <- factor(damages$EVTYPE, levels = damages$EVTYPE)

ggplot(damages, aes(x = EVTYPE, y = DAMAGES)) + 
    geom_bar(stat = "identity", fill = "blue") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("") + ylab("Damages ($)") + ggtitle("Property & Crop Damages by Events")

Results

By inspection, We see that 1. tornado events are the most harmful, and excessive heat is the second most harmful in terms of number of fatalities. 2. tornado events are the most harmful, and excessive heat is the second most harmful in terms of number of injuries. 3. flood damages have the greatest economic impact.

Analysis of Events from NOAA Storm Database

Stephen Bique

1/4/2021