knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = "C:/Users/luis-alaniz/Desktop/Reproducible Research/Week 4/repdata_2Fdata%2FStormData")
options(scipen = 999)
The analysis consists of describing the damage of catastrophic events in terms of their effect on population health and property destruction (measured in millions of dollars). The source of the data is the National Weather Service. The data base spans a period from the year 1950 to the year 2011. In order to evaluate the effects of catastrophic events the 10 most damaging were selected for every metric. They were separated in events which affect population health and events which destroy populations property. The destruction of the events was summed for the entire period. However there was no adjustment made for the cost of living across time for destruction measured in dollars which means that the destruction of early events in the databases is likely underestimated. In order to produce the analysis five R-Packages were used 1 for data handling and 4 for graphics and colors. The results show that the most destructive event in terms of live and injuries are tornados . In terms of property damage the most destructive event are tornados and in terms of crop damage the most destructive event is hail.
The following R-Packages were used:
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(RColorBrewer)
library(cowplot)
## Warning: package 'cowplot' was built under R version 3.4.3
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
library(scales)
The root directory was setup using the knitr option root.dir and then the data was loaded into R using the R command read.csv. The data was then store in a table data frame named “events”. This table data frame was grouped by event types, then the damage from the events was summed by groups and finally ranked.
events <- read.csv("repdata%2Fdata%2FStormData")
events <- tbl_df(events)
events <- group_by(events, EVTYPE) %>%
summarise(TOTAL_FATAL = sum(FATALITIES), TOTAL_INJURIES = sum(INJURIES), TOTAL_PROP_DMG = sum(PROPDMG), TOTAL_CROP_DMG = sum(CROPDMG)) %>%
mutate(RANK_TOTAL_FATAL = min_rank(TOTAL_FATAL), RANK_TOTAL_INJURIES = min_rank(TOTAL_INJURIES)) %>%
mutate(RANK_PROP_DMG = min_rank(TOTAL_PROP_DMG), RANK_CROP_DMG = min_rank(TOTAL_CROP_DMG)) %>%
filter(RANK_TOTAL_FATAL > (max(RANK_TOTAL_FATAL)-10) | RANK_TOTAL_INJURIES > (max(RANK_TOTAL_INJURIES)-10) | RANK_PROP_DMG > (max(RANK_PROP_DMG)-10)| RANK_CROP_DMG > (max(RANK_CROP_DMG)-10))
Below are shown the 10 wheather events with most fatalities and most injuries in the period 1950 - 2011.
# Arranging
most_fatal <- filter(events,RANK_TOTAL_FATAL > (max(RANK_TOTAL_FATAL)-10)) %>%
arrange(desc(RANK_TOTAL_FATAL)) %>%
select(TOTAL_FATAL, EVTYPE)
most_injuries <- filter(events,RANK_TOTAL_INJURIES > (max(RANK_TOTAL_INJURIES)-10)) %>%
arrange(desc(RANK_TOTAL_INJURIES)) %>%
select(TOTAL_INJURIES, EVTYPE)
# Panel 1
xx1 <- ggplot(most_fatal,aes(x = reorder(EVTYPE, TOTAL_FATAL), y = TOTAL_FATAL, fill = EVTYPE)) +
geom_bar(stat = "identity") +
labs(x = '') + # no x axis lables
labs(y = "Fatalities", size = 3) + # y axis lables
scale_y_continuous(labels = comma) + # adding commas to y-axis numbers
labs(title = "Most Harmful Events to Population Health", size = 5) + # title
labs(subtitle = "The 10 Events with Most Fatalities", size = 3) + # title
geom_text(aes(label = comma(TOTAL_FATAL)), vjust = 0, hjust = 0.5, size = 2.5) + # adding value labels
coord_cartesian(ylim = c(0, 1.1*max(most_fatal$TOTAL_FATAL))) +
scale_fill_brewer(palette="Paired") + # better colors
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 0.5, size = 6)) + # x lables orientation, spacing and sizing
theme(legend.position="none")
# Panel 2
xx2 <- ggplot(most_injuries,aes(x = reorder(EVTYPE, TOTAL_INJURIES), y = TOTAL_INJURIES, fill = EVTYPE)) +
geom_bar(stat = "identity") +
labs(x = '') + # no x axis lables
labs(y = "Injuries", size = 3) + # y axis lables
scale_y_continuous(labels = comma) + # adding commas to y-axis numbers
labs(subtitle = "The 10 Events with Most Injuries", size = 3) + # title
geom_text(aes(label = comma(TOTAL_INJURIES)), vjust = 0, hjust = 0.5, size = 2.5) + # adding value labels
coord_cartesian(ylim = c(0, 1.1*max(most_injuries$TOTAL_INJURIES))) +
scale_fill_brewer(palette="Paired") + # better colors
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 0.5, size = 6)) + # x lables orientation, spacing and sizing
theme(legend.position="none")
ggdraw(add_sub(plot_grid(xx1,xx2, align = "h",axis = "b"), "Source: National Weather Service.(TSTM) Means Marine Thunderstorm Wind.", size = 8, hjust = 0.85)) # Alaing = "h" aligns graphs horizontally
Below are shown the 10 wheather events with most property damage and most crop damage in the period 1950 - 2011.
# Arranging
most_prop_dmg <- filter(events,RANK_PROP_DMG > (max(RANK_PROP_DMG)-10)) %>%
mutate(TOTAL_PROP_DMG = TOTAL_PROP_DMG/1000) %>%
mutate(TOTAL_PROP_DMG = trunc(TOTAL_PROP_DMG)) %>%
arrange(desc(RANK_PROP_DMG)) %>%
select(TOTAL_PROP_DMG, EVTYPE)
most_crop_dmg <- filter(events,RANK_CROP_DMG > (max(RANK_CROP_DMG)-10)) %>%
mutate(TOTAL_CROP_DMG = TOTAL_CROP_DMG/1000) %>%
mutate(TOTAL_CROP_DMG = trunc(TOTAL_CROP_DMG)) %>%
arrange(desc(RANK_CROP_DMG)) %>%
select(TOTAL_CROP_DMG, EVTYPE)
# Panel 1
xx3 <- ggplot(most_prop_dmg,aes(x = reorder(EVTYPE, TOTAL_PROP_DMG), y = TOTAL_PROP_DMG, fill = EVTYPE)) +
geom_bar(stat = "identity") +
labs(x = '') + # no x axis lables
labs(y = "Millions Dollars", size = 3) + # y axis lables
scale_y_continuous(labels = comma) + # adding commas to y-axis numbers
labs(title = "Most Harmful Events in Terms of Economic Consequences", size = 5) + # title
labs(subtitle = "The 10 Events with Most Property Damage", size = 3) + # title
geom_text(aes(label = comma(TOTAL_PROP_DMG)), vjust = 0, hjust = 0.5, size = 2.5) + # adding value labels and formatting with commas
coord_cartesian(ylim = c(0, 1.1*max(most_prop_dmg$TOTAL_PROP_DMG))) +
scale_fill_brewer(palette="Paired") + # better colors
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 0.5, size = 6)) + # x lables orientation, spacing and sizing
theme(legend.position="none")
# Panel 2
xx4 <- ggplot(most_crop_dmg,aes(x = reorder(EVTYPE, TOTAL_CROP_DMG), y = TOTAL_CROP_DMG, fill = EVTYPE)) +
geom_bar(stat = "identity") +
labs(x = '') + # no x axis lables
labs(y = "Millions Dollars", size = 3) + # y axis lables
scale_y_continuous(labels = comma) + # adding commas to y-axis numbers
labs(subtitle = "The 10 Events with Most Crop Damage", size = 3) + # title
geom_text(aes(label = comma(TOTAL_CROP_DMG)), vjust = 0, hjust = 0.5, size = 2.5) + # adding value labels and formatting with commas
coord_cartesian(ylim = c(0, 1.1*max(most_crop_dmg$TOTAL_CROP_DMG))) +
scale_fill_brewer(palette="Paired") + # better colors
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 0.5, size = 6)) + # x lables orientation, spacing and sizing
theme(legend.position="none")
ggdraw(add_sub(plot_grid(xx3,xx4, align = "h", axis = "b"), "Source: National Weather Service.(TSTM) Means Marine Thunderstorm Wind.", size = 8, hjust = 0.85)) # Alaing = "h" aligns graphs horizontally