knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = "C:/Users/luis-alaniz/Desktop/Reproducible Research/Week 4/repdata_2Fdata%2FStormData")
options(scipen = 999)

Synopsis.

The analysis consists of describing the damage of catastrophic events in terms of their effect on population health and property destruction (measured in millions of dollars). The source of the data is the National Weather Service. The data base spans a period from the year 1950 to the year 2011. In order to evaluate the effects of catastrophic events the 10 most damaging were selected for every metric. They were separated in events which affect population health and events which destroy populations property. The destruction of the events was summed for the entire period. However there was no adjustment made for the cost of living across time for destruction measured in dollars which means that the destruction of early events in the databases is likely underestimated. In order to produce the analysis five R-Packages were used 1 for data handling and 4 for graphics and colors. The results show that the most destructive event in terms of live and injuries are tornados . In terms of property damage the most destructive event are tornados and in terms of crop damage the most destructive event is hail.

Data Processing.

R-Packages used.

The following R-Packages were used:

  • dplyr for data handling.
  • ggplot2, cowplot, and scales, for plotting, making panel plots and setting number scales respectively. The use of cowplot was necessary because the way the analysis was done there was no way of making factors for certain events that are common in terms of fatalities and injuries for example.
  • RColorBrewer for graphic colors.
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(RColorBrewer)
library(cowplot)
## Warning: package 'cowplot' was built under R version 3.4.3
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
library(scales)

Setting up the data.

The root directory was setup using the knitr option root.dir and then the data was loaded into R using the R command read.csv. The data was then store in a table data frame named “events”. This table data frame was grouped by event types, then the damage from the events was summed by groups and finally ranked.

events <- read.csv("repdata%2Fdata%2FStormData")
events <- tbl_df(events)
events <- group_by(events, EVTYPE) %>%
        summarise(TOTAL_FATAL = sum(FATALITIES), TOTAL_INJURIES = sum(INJURIES), TOTAL_PROP_DMG = sum(PROPDMG), TOTAL_CROP_DMG = sum(CROPDMG)) %>%
        mutate(RANK_TOTAL_FATAL = min_rank(TOTAL_FATAL), RANK_TOTAL_INJURIES = min_rank(TOTAL_INJURIES)) %>%
        mutate(RANK_PROP_DMG = min_rank(TOTAL_PROP_DMG), RANK_CROP_DMG = min_rank(TOTAL_CROP_DMG)) %>%
        filter(RANK_TOTAL_FATAL > (max(RANK_TOTAL_FATAL)-10) | RANK_TOTAL_INJURIES > (max(RANK_TOTAL_INJURIES)-10) | RANK_PROP_DMG > (max(RANK_PROP_DMG)-10)| RANK_CROP_DMG > (max(RANK_CROP_DMG)-10))

Results.

Below are shown the 10 wheather events with most fatalities and most injuries in the period 1950 - 2011.

# Arranging 
most_fatal <- filter(events,RANK_TOTAL_FATAL > (max(RANK_TOTAL_FATAL)-10)) %>%
        arrange(desc(RANK_TOTAL_FATAL)) %>%
        select(TOTAL_FATAL, EVTYPE)
most_injuries <- filter(events,RANK_TOTAL_INJURIES > (max(RANK_TOTAL_INJURIES)-10)) %>%
        arrange(desc(RANK_TOTAL_INJURIES)) %>%
        select(TOTAL_INJURIES, EVTYPE)

# Panel 1
xx1 <- ggplot(most_fatal,aes(x = reorder(EVTYPE, TOTAL_FATAL), y = TOTAL_FATAL, fill = EVTYPE)) +
        geom_bar(stat = "identity") +
        labs(x = '') + # no x axis lables
        labs(y = "Fatalities",  size = 3) + # y axis lables
        scale_y_continuous(labels = comma) + # adding commas to y-axis numbers
        labs(title = "Most Harmful Events to Population Health", size = 5) + # title
        labs(subtitle = "The 10 Events with Most Fatalities", size = 3) + # title
        geom_text(aes(label = comma(TOTAL_FATAL)), vjust = 0, hjust = 0.5, size = 2.5) + # adding value labels
        coord_cartesian(ylim = c(0, 1.1*max(most_fatal$TOTAL_FATAL))) + 
        scale_fill_brewer(palette="Paired") + # better colors
        theme_minimal() +
        theme(axis.text.x = element_text(angle = 90, hjust = 0.5, size = 6)) + # x lables orientation, spacing and sizing
        theme(legend.position="none")

# Panel 2        
xx2 <- ggplot(most_injuries,aes(x = reorder(EVTYPE, TOTAL_INJURIES), y = TOTAL_INJURIES, fill = EVTYPE)) +
        geom_bar(stat = "identity") +
        labs(x = '') + # no x axis lables
        labs(y = "Injuries", size = 3) + # y axis lables
        scale_y_continuous(labels = comma) + # adding commas to y-axis numbers
        labs(subtitle = "The 10 Events with Most Injuries", size = 3) + # title
        geom_text(aes(label = comma(TOTAL_INJURIES)), vjust = 0, hjust = 0.5, size = 2.5) + # adding value labels
        coord_cartesian(ylim = c(0, 1.1*max(most_injuries$TOTAL_INJURIES))) +
        scale_fill_brewer(palette="Paired") + # better colors
        theme_minimal() +
        theme(axis.text.x = element_text(angle = 90, hjust = 0.5, size = 6)) + # x lables orientation, spacing and sizing
        theme(legend.position="none")

ggdraw(add_sub(plot_grid(xx1,xx2, align = "h",axis =  "b"), "Source: National Weather Service.(TSTM) Means Marine Thunderstorm Wind.", size = 8, hjust = 0.85)) # Alaing = "h" aligns graphs horizontally

Below are shown the 10 wheather events with most property damage and most crop damage in the period 1950 - 2011.

# Arranging 
most_prop_dmg <- filter(events,RANK_PROP_DMG > (max(RANK_PROP_DMG)-10)) %>%
        mutate(TOTAL_PROP_DMG = TOTAL_PROP_DMG/1000) %>%
        mutate(TOTAL_PROP_DMG = trunc(TOTAL_PROP_DMG)) %>%
        arrange(desc(RANK_PROP_DMG)) %>%
        select(TOTAL_PROP_DMG, EVTYPE)
most_crop_dmg <- filter(events,RANK_CROP_DMG > (max(RANK_CROP_DMG)-10)) %>%
        mutate(TOTAL_CROP_DMG = TOTAL_CROP_DMG/1000) %>%
        mutate(TOTAL_CROP_DMG = trunc(TOTAL_CROP_DMG)) %>%
        arrange(desc(RANK_CROP_DMG)) %>%
        select(TOTAL_CROP_DMG, EVTYPE)

# Panel 1
xx3 <- ggplot(most_prop_dmg,aes(x = reorder(EVTYPE, TOTAL_PROP_DMG), y = TOTAL_PROP_DMG, fill = EVTYPE)) +
        geom_bar(stat = "identity") +
        labs(x = '') + # no x axis lables
        labs(y = "Millions Dollars",  size = 3) + # y axis lables
        scale_y_continuous(labels = comma) + # adding commas to y-axis numbers
        labs(title = "Most Harmful Events in Terms of Economic Consequences", size = 5) + # title
        labs(subtitle = "The 10 Events with Most Property Damage", size = 3) + # title
        geom_text(aes(label = comma(TOTAL_PROP_DMG)), vjust = 0, hjust = 0.5, size = 2.5) + # adding value labels and formatting with commas
        coord_cartesian(ylim = c(0, 1.1*max(most_prop_dmg$TOTAL_PROP_DMG))) + 
        scale_fill_brewer(palette="Paired") + # better colors
        theme_minimal() +
        theme(axis.text.x = element_text(angle = 90, hjust = 0.5, size = 6)) + # x lables orientation, spacing and sizing
        theme(legend.position="none")

# Panel 2        
xx4 <- ggplot(most_crop_dmg,aes(x = reorder(EVTYPE, TOTAL_CROP_DMG), y = TOTAL_CROP_DMG, fill = EVTYPE)) +
        geom_bar(stat = "identity") +
        labs(x = '') + # no x axis lables
        labs(y = "Millions Dollars", size = 3) + # y axis lables
        scale_y_continuous(labels = comma) + # adding commas to y-axis numbers
        labs(subtitle = "The 10 Events with Most Crop Damage", size = 3) + # title
        geom_text(aes(label = comma(TOTAL_CROP_DMG)), vjust = 0, hjust = 0.5, size = 2.5) + # adding value labels and formatting with commas
        coord_cartesian(ylim = c(0, 1.1*max(most_crop_dmg$TOTAL_CROP_DMG))) +
        scale_fill_brewer(palette="Paired") + # better colors
        theme_minimal() +
        theme(axis.text.x = element_text(angle = 90, hjust = 0.5, size = 6)) + # x lables orientation, spacing and sizing
        theme(legend.position="none")

ggdraw(add_sub(plot_grid(xx3,xx4, align = "h", axis =  "b"), "Source: National Weather Service.(TSTM) Means Marine Thunderstorm Wind.", size = 8, hjust = 0.85)) # Alaing = "h" aligns graphs horizontally