Synopsis

In this exercise, we will review storm data from the National Weather Service and National Climate Control Data Center. The data within the dataset contains data about various weather event types, fatalities, injuries, cost associated with property damage and crop damage. This document will begin with data processing and transformation of the data for analysis. In the results section, this document will made to plot the results of the data analysis. There 3 figures included in this document, which address the question of which types of events are most harmful to population health and which types of events have the greatest economic consequences.

Data Processing

Get and clean dataset from URL. Store dataset in a tibble using tidyverse. Review dataset and make a tidy dataset. Prep tidy dataset for data analysis

If needed, install library Packages ‘tidyverse’ and ‘gridExtra’

list.of.packages <- c("tidyverse", "gridExtra")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

Load the ‘tidyverse’ library

library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.0.0     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.6
## v tidyr   0.8.1     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0
## -- Conflicts ---------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Download the file and put the file in the data folder

if (!file.exists("./data")) {dir.create("./data")}
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = "./data/StormData.csv")

Get csv file in the data folder Storm Data Dataset. Read and store storm data files into tibble. Be patient, this could take a few minutes to load the dataset

path_rf <- file.path("./data" , "StormData.csv")
stormData <- read.csv(path_rf)

Create storm data tibble

stormData_tibble <- as.tibble(stormData)

Re-create new tibble for specific variables

stormData_tibble <- tibble('EVTYPE' =stormData_tibble$EVTYPE, 
                           'Fatalities' =stormData_tibble$FATALITIES,
                           'Injuries' =stormData_tibble$INJURIES,
                           'Property_Damage' =stormData_tibble$PROPDMG,
                           'Property_DamageExp' =stormData_tibble$PROPDMGEXP,
                           'Crop_Damage' =stormData_tibble$CROPDMG,
                           'Crop_DamageExp' =stormData_tibble$CROPDMGEXP)

head(stormData_tibble)
## # A tibble: 6 x 7
##   EVTYPE  Fatalities Injuries Property_Damage Property_Damage~ Crop_Damage
##   <fct>        <dbl>    <dbl>           <dbl> <fct>                  <dbl>
## 1 TORNADO          0       15            25   K                          0
## 2 TORNADO          0        0             2.5 K                          0
## 3 TORNADO          0        2            25   K                          0
## 4 TORNADO          0        2             2.5 K                          0
## 5 TORNADO          0        2             2.5 K                          0
## 6 TORNADO          0        6             2.5 K                          0
## # ... with 1 more variable: Crop_DamageExp <fct>

Change Storm data tibble to USD and convert into USD

USD <- stormData_tibble

Convert Property Damage into USD

USD$Property_USD = 0
USD[USD$Property_DamageExp == "H", ]$Property_USD = USD[USD$Property_DamageExp == "H", ]$Property_Damage * 10^2
USD[USD$Property_DamageExp == "K", ]$Property_USD = USD[USD$Property_DamageExp == "K", ]$Property_Damage * 10^3
USD[USD$Property_DamageExp == "M", ]$Property_USD = USD[USD$Property_DamageExp == "M", ]$Property_Damage * 10^6
USD[USD$Property_DamageExp == "B", ]$Property_USD = USD[USD$Property_DamageExp == "B", ]$Property_Damage * 10^9

Convert Crop Damage into USD

USD$Crop_USD = 0
USD[USD$Crop_DamageExp == "H", ]$Crop_USD = USD[USD$Crop_DamageExp == "H", ]$Crop_Damage * 10^2
USD[USD$Crop_DamageExp == "K", ]$Crop_USD = USD[USD$Crop_DamageExp == "K", ]$Crop_Damage * 10^3
USD[USD$Crop_DamageExp == "M", ]$Crop_USD = USD[USD$Crop_DamageExp == "M", ]$Crop_Damage * 10^6
USD[USD$Crop_DamageExp == "B", ]$Crop_USD = USD[USD$Crop_DamageExp == "B", ]$Crop_Damage * 10^9

head(USD)
## # A tibble: 6 x 9
##   EVTYPE  Fatalities Injuries Property_Damage Property_Damage~ Crop_Damage
##   <fct>        <dbl>    <dbl>           <dbl> <fct>                  <dbl>
## 1 TORNADO          0       15            25   K                          0
## 2 TORNADO          0        0             2.5 K                          0
## 3 TORNADO          0        2            25   K                          0
## 4 TORNADO          0        2             2.5 K                          0
## 5 TORNADO          0        2             2.5 K                          0
## 6 TORNADO          0        6             2.5 K                          0
## # ... with 3 more variables: Crop_DamageExp <fct>, Property_USD <dbl>,
## #   Crop_USD <dbl>

Aggregate event types by fatalities and injuries then store in objects

fatalities <- aggregate(Fatalities ~ EVTYPE, data = USD, sum)
injuries <- aggregate(Injuries ~ EVTYPE, data = USD, sum)

Results

Sort the fatalities

fatalities <- fatalities[order(-fatalities$Fatalities), ]
fatalities$EVTYPE <- factor(fatalities$EVTYPE, levels = fatalities$EVTYPE)
head(fatalities)
##             EVTYPE Fatalities
## 834        TORNADO       5633
## 130 EXCESSIVE HEAT       1903
## 153    FLASH FLOOD        978
## 275           HEAT        937
## 464      LIGHTNING        816
## 856      TSTM WIND        504

Sort the injuries

injuries <- injuries[order(-injuries$Injuries), ]
injuries$EVTYPE <- factor(injuries$EVTYPE, levels = injuries$EVTYPE)
head(injuries)
##             EVTYPE Injuries
## 834        TORNADO    91346
## 856      TSTM WIND     6957
## 170          FLOOD     6789
## 130 EXCESSIVE HEAT     6525
## 464      LIGHTNING     5230
## 275           HEAT     2100

Plot fatalites based on the most severe weather

library(ggplot2)

fatalitesPlot <- ggplot(data=fatalities[1:10,],
                        aes(x=reorder(EVTYPE,Fatalities), y=Fatalities, fill=Fatalities)) +
  geom_bar(stat="identity") +
  coord_flip() +
  ylab("Severe Weather Events with the Highest Total of Fatalities") +
  xlab("Event type") +
  theme(legend.position="none")

Plot injuries based on the most severe weather

injuriesPlot <- ggplot(data=injuries[1:10,],
                       aes(x=reorder(EVTYPE,Injuries), y=Injuries, fill=Injuries)) +
  geom_bar(stat="identity") +
  coord_flip() +
  ylab("Severe Weather Events with the Highest Total of Injuries") +
  xlab("Event type") +
  theme(legend.position="none")

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
grid.arrange(fatalitesPlot, fatalitesPlot,nrow=2, main="Top 10 harmful weather events in the US (1950-2011)",grobs = list(fatalitesPlot,injuriesPlot))

Aggregate the economic impact of property and crop damage, store in object

ecoDamage <- aggregate(Property_USD + Crop_USD ~ EVTYPE, data=USD, sum)
names(ecoDamage) = c("EventType", "TotalDamage")

Sort economic damage

ecoDamage <- ecoDamage[order(-ecoDamage$TotalDamage), ]
ecoDamage$EventType <- factor(ecoDamage$EventType, levels = ecoDamage$EventType)

head(ecoDamage)
##             EventType  TotalDamage
## 170             FLOOD 150319678250
## 411 HURRICANE/TYPHOON  71913712800
## 834           TORNADO  57340613590
## 670       STORM SURGE  43323541000
## 244              HAIL  18752904670
## 153       FLASH FLOOD  17562128610

Plot economic damage

ecoDamagePlot <- ggplot(data=ecoDamage[1:10,],
                        aes(x=reorder(EventType, TotalDamage), y=(TotalDamage), fill=TotalDamage )) +
  geom_bar(stat="identity") +
  coord_flip() +
  xlab("Event type") +
  ylab("Highest Economic Damage in (Property & Crops) Dollars") +
  theme(legend.position="none")

ecoDamagePlot

Summary

The analysis from the first plot “Severe Weather Events with the Highest Total of Fatalities” in dicates that Tornados are the severe weather event that caused the most fatalities as well as the most injuries as indicated in the “Severe Weather Events with the Highest Total of Injuries” plot. The last plot, “Highest Economic Damage in (Property & Crops) Dollars,” indicates that Floods are causing the most economic damageHurricanes/Typhoons coming in second, and Tornados coming in third place.