##R Version, Platform and OS used for development and testing
R version 4.0.3 (2020-10-10) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 19042)
The basic goal of this assignment is to explore the NOAA Storm Database and answer the following questions:
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(ggplot2)
if (!file.exists("repdata_data_StormData.csv.bz2") )
{
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url,destfile='repdata_data_StormData.csv.bz2',mode='wb')
}
stormdata <- read.csv(bzfile("repdata_data_StormData.csv.bz2"), stringsAsFactors = FALSE, header=TRUE)
# Select needed columns
data <- stormdata[,c('EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]
# Add scaling factors for convenience
data$CROPDMG_ = 0
data[data$CROPDMGEXP == "H",]$CROPDMG_ = data[data$CROPDMGEXP == "H",]$CROPDMG * 1000
data[data$CROPDMGEXP == "K",]$CROPDMG_ = data[data$CROPDMGEXP == "K",]$CROPDMG * 1000
data[data$CROPDMGEXP == "M",]$CROPDMG_ = data[data$CROPDMGEXP == "M",]$CROPDMG * 10^6
data[data$CROPDMGEXP == "B",]$CROPDMG_ = data[data$CROPDMGEXP == "B",]$CROPDMG * 10^9
data$PROPDMG_ = 0
data[data$PROPDMGEXP == "H",]$PROPDMG_ = data[data$PROPDMGEXP == "H",]$PROPDMG * 100
data[data$PROPDMGEXP == "K",]$PROPDMG_ = data[data$PROPDMGEXP == "K",]$PROPDMG * 1000
data[data$PROPDMGEXP == "M",]$PROPDMG_ = data[data$PROPDMGEXP == "M",]$PROPDMG * 10^6
data[data$PROPDMGEXP == "B",]$PROPDMG_ = data[data$PROPDMGEXP == "B",]$PROPDMG * 10^9
fatalities <- aggregate(FATALITIES ~ EVTYPE, data=data, sum)
fatalities <- fatalities[order(-fatalities$FATALITIES), ][1:10,]
fatalities$EVTYPE <- factor(fatalities$EVTYPE, levels = fatalities$EVTYPE)
ggplot(fatalities, aes(x = EVTYPE, y = FATALITIES)) +
geom_bar(stat = "identity", fill = "blue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("") + ylab("Fatalities") + ggtitle("Fatalities by Events")
injuries <- aggregate(INJURIES ~ EVTYPE, data=data, sum)
injuries <- injuries[order(-injuries$INJURIES), ][1:10,]
injuries$EVTYPE <- factor(injuries$EVTYPE, levels = injuries$EVTYPE)
ggplot(injuries, aes(x = EVTYPE, y = INJURIES)) +
geom_bar(stat = "identity", fill = "blue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("") + ylab("Injuries") + ggtitle("Injuries by Events")
damages <- aggregate(PROPDMG_ + CROPDMG_ ~ EVTYPE, data=data, sum)
names(damages) = c("EVTYPE", "DAMAGES")
damages <- damages[order(-damages$DAMAGES), ][1:10,]
damages$EVTYPE <- factor(damages$EVTYPE, levels = damages$EVTYPE)
ggplot(damages, aes(x = EVTYPE, y = DAMAGES)) +
geom_bar(stat = "identity", fill = "blue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("") + ylab("Damages ($)") + ggtitle("Property & Crop Damages by Events")
By inspection, We see that 1. tornado events are the most harmful, and excessive heat is the second most harmful in terms of number of fatalities. 2. tornado events are the most harmful, and excessive heat is the second most harmful in terms of number of injuries. 3. flood damages have the greatest economic impact.