Loading packages
knitr::opts_chunk$set(cache = TRUE)
library(ggplot2)
library(reshape2)
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Loading the data
data_NOAA <- read.csv("repdata-data-StormData.csv.bz2", sep = ",", header = T)
Subsetting data
data_NOAA <- data_NOAA[, c('EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]
Cleaning data
data_cleaned <- data_NOAA %>%
mutate(CROPDMG = CROPDMG * case_when(
CROPDMGEXP == "B" ~ 10^9,
CROPDMGEXP == "k" | CROPDMGEXP == "K" ~ 10^3,
CROPDMGEXP == "m" | CROPDMGEXP == "M" ~ 10^6,
CROPDMGEXP == 2 ~ 2,
TRUE ~ 0), CROPDMGEXP = NULL)
data_cleaned %<>% mutate(PROPDMG = PROPDMG * case_when(
PROPDMGEXP == "B" ~ 10^9,
PROPDMGEXP == "k" | PROPDMGEXP == "K" ~ 10^3,
PROPDMGEXP == "m" | PROPDMGEXP == "M" ~ 10^6,
PROPDMGEXP == "h" | PROPDMGEXP == "H" ~ 10^2,
PROPDMGEXP == 1 ~ 1,
PROPDMGEXP == 2 ~ 2,
PROPDMGEXP == 3 ~ 3,
PROPDMGEXP == 4 ~ 4,
PROPDMGEXP == 5 ~ 5,
PROPDMGEXP == 6 ~ 6,
PROPDMGEXP == 7 ~ 7,
PROPDMGEXP == 8 ~ 8,
TRUE ~ 0), PROPDMGEXP = NULL)
data_final <- melt(data_cleaned, id = c('EVTYPE'), variable.name = "category", value.name = "value", na.rm = TRUE)
1.Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health ?
fatalities <- data_final[data_final$category == "FATALITIES", ]
fat_nr <- aggregate(value ~ EVTYPE, data = fatalities, sum)
fat_nr <- fat_nr[order(-fat_nr$value), ][1:10, ]
fat_nr$EVTYPE <- factor(fat_nr$EVTYPE, levels = fat_nr$EVTYPE)
# Plot nr. of fatalities with the most harmful event type
ggplot(fat_nr, aes(x = EVTYPE, y = value)) +
geom_bar(stat = "identity", fill = "red") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Fatalities") + ggtitle("Number of fatalities for the top 10 weather events")
injuries <- data_final[data_final$category == "INJURIES", ]
inj_nr <- aggregate(value ~ EVTYPE, data = injuries, sum)
inj_nr <- inj_nr[order(-inj_nr$value), ][1:10, ]
inj_nr$EVTYPE <- factor(inj_nr$EVTYPE, levels = inj_nr$EVTYPE)
# Plot nr. of injuries with the most harmful event type
ggplot(inj_nr, aes(x = EVTYPE, y = value)) +
geom_bar(stat = "identity", fill = "red") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Injuries") + ggtitle("Number of injuries for the top 10 weather events")
2.Across the United States, which types of events have the greatest economic consequences ?
damages <- data_final[data_final$category == "PROPDMGNUM|CROPDMGNUM", ]
dam_nr <- aggregate(value ~ EVTYPE, data = data_final, sum)
names(dam_nr) = c("EVTYPE", "DAMAGE")
dam_nr <- dam_nr[order(-dam_nr$DAMAGE), ][1:10, ]
dam_nr$EVTYPE <- factor(dam_nr$EVTYPE, levels = dam_nr$EVTYPE)
# Plot nr. of damages with the most harmful event type
ggplot(dam_nr, aes(x = EVTYPE, y = DAMAGE)) +
geom_bar(stat = "identity", fill = "red") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Damages") + ggtitle("Property & Crop damages for the top 10 weather events")
As seen in the picture, floods cause the most damage.