In this exercise, we will review storm data from the National Weather Service and National Climate Control Data Center. The data within the dataset contains data about various weather event types, fatalities, injuries, cost associated with property damage and crop damage. This document will begin with data processing and transformation of the data for analysis. In the results section, this document will made to plot the results of the data analysis. There 3 figures included in this document, which address the question of which types of events are most harmful to population health and which types of events have the greatest economic consequences.
Get and clean dataset from URL. Store dataset in a tibble using tidyverse. Review dataset and make a tidy dataset. Prep tidy dataset for data analysis
If needed, install library Packages ‘tidyverse’ and ‘gridExtra’
list.of.packages <- c("tidyverse", "gridExtra")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
Load the ‘tidyverse’ library
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.0.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.6
## v tidyr 0.8.1 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ---------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Download the file and put the file in the data folder
if (!file.exists("./data")) {dir.create("./data")}
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = "./data/StormData.csv")
Get csv file in the data folder Storm Data Dataset. Read and store storm data files into tibble. Be patient, this could take a few minutes to load the dataset
path_rf <- file.path("./data" , "StormData.csv")
stormData <- read.csv(path_rf)
Create storm data tibble
stormData_tibble <- as.tibble(stormData)
Re-create new tibble for specific variables
stormData_tibble <- tibble('EVTYPE' =stormData_tibble$EVTYPE,
'Fatalities' =stormData_tibble$FATALITIES,
'Injuries' =stormData_tibble$INJURIES,
'Property_Damage' =stormData_tibble$PROPDMG,
'Property_DamageExp' =stormData_tibble$PROPDMGEXP,
'Crop_Damage' =stormData_tibble$CROPDMG,
'Crop_DamageExp' =stormData_tibble$CROPDMGEXP)
head(stormData_tibble)
## # A tibble: 6 x 7
## EVTYPE Fatalities Injuries Property_Damage Property_Damage~ Crop_Damage
## <fct> <dbl> <dbl> <dbl> <fct> <dbl>
## 1 TORNADO 0 15 25 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
## # ... with 1 more variable: Crop_DamageExp <fct>
Change Storm data tibble to USD and convert into USD
USD <- stormData_tibble
Convert Property Damage into USD
USD$Property_USD = 0
USD[USD$Property_DamageExp == "H", ]$Property_USD = USD[USD$Property_DamageExp == "H", ]$Property_Damage * 10^2
USD[USD$Property_DamageExp == "K", ]$Property_USD = USD[USD$Property_DamageExp == "K", ]$Property_Damage * 10^3
USD[USD$Property_DamageExp == "M", ]$Property_USD = USD[USD$Property_DamageExp == "M", ]$Property_Damage * 10^6
USD[USD$Property_DamageExp == "B", ]$Property_USD = USD[USD$Property_DamageExp == "B", ]$Property_Damage * 10^9
Convert Crop Damage into USD
USD$Crop_USD = 0
USD[USD$Crop_DamageExp == "H", ]$Crop_USD = USD[USD$Crop_DamageExp == "H", ]$Crop_Damage * 10^2
USD[USD$Crop_DamageExp == "K", ]$Crop_USD = USD[USD$Crop_DamageExp == "K", ]$Crop_Damage * 10^3
USD[USD$Crop_DamageExp == "M", ]$Crop_USD = USD[USD$Crop_DamageExp == "M", ]$Crop_Damage * 10^6
USD[USD$Crop_DamageExp == "B", ]$Crop_USD = USD[USD$Crop_DamageExp == "B", ]$Crop_Damage * 10^9
head(USD)
## # A tibble: 6 x 9
## EVTYPE Fatalities Injuries Property_Damage Property_Damage~ Crop_Damage
## <fct> <dbl> <dbl> <dbl> <fct> <dbl>
## 1 TORNADO 0 15 25 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
## # ... with 3 more variables: Crop_DamageExp <fct>, Property_USD <dbl>,
## # Crop_USD <dbl>
Aggregate event types by fatalities and injuries then store in objects
fatalities <- aggregate(Fatalities ~ EVTYPE, data = USD, sum)
injuries <- aggregate(Injuries ~ EVTYPE, data = USD, sum)
Sort the fatalities
fatalities <- fatalities[order(-fatalities$Fatalities), ]
fatalities$EVTYPE <- factor(fatalities$EVTYPE, levels = fatalities$EVTYPE)
head(fatalities)
## EVTYPE Fatalities
## 834 TORNADO 5633
## 130 EXCESSIVE HEAT 1903
## 153 FLASH FLOOD 978
## 275 HEAT 937
## 464 LIGHTNING 816
## 856 TSTM WIND 504
Sort the injuries
injuries <- injuries[order(-injuries$Injuries), ]
injuries$EVTYPE <- factor(injuries$EVTYPE, levels = injuries$EVTYPE)
head(injuries)
## EVTYPE Injuries
## 834 TORNADO 91346
## 856 TSTM WIND 6957
## 170 FLOOD 6789
## 130 EXCESSIVE HEAT 6525
## 464 LIGHTNING 5230
## 275 HEAT 2100
Plot fatalites based on the most severe weather
library(ggplot2)
fatalitesPlot <- ggplot(data=fatalities[1:10,],
aes(x=reorder(EVTYPE,Fatalities), y=Fatalities, fill=Fatalities)) +
geom_bar(stat="identity") +
coord_flip() +
ylab("Severe Weather Events with the Highest Total of Fatalities") +
xlab("Event type") +
theme(legend.position="none")
Plot injuries based on the most severe weather
injuriesPlot <- ggplot(data=injuries[1:10,],
aes(x=reorder(EVTYPE,Injuries), y=Injuries, fill=Injuries)) +
geom_bar(stat="identity") +
coord_flip() +
ylab("Severe Weather Events with the Highest Total of Injuries") +
xlab("Event type") +
theme(legend.position="none")
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.arrange(fatalitesPlot, fatalitesPlot,nrow=2, main="Top 10 harmful weather events in the US (1950-2011)",grobs = list(fatalitesPlot,injuriesPlot))
Aggregate the economic impact of property and crop damage, store in object
ecoDamage <- aggregate(Property_USD + Crop_USD ~ EVTYPE, data=USD, sum)
names(ecoDamage) = c("EventType", "TotalDamage")
Sort economic damage
ecoDamage <- ecoDamage[order(-ecoDamage$TotalDamage), ]
ecoDamage$EventType <- factor(ecoDamage$EventType, levels = ecoDamage$EventType)
head(ecoDamage)
## EventType TotalDamage
## 170 FLOOD 150319678250
## 411 HURRICANE/TYPHOON 71913712800
## 834 TORNADO 57340613590
## 670 STORM SURGE 43323541000
## 244 HAIL 18752904670
## 153 FLASH FLOOD 17562128610
Plot economic damage
ecoDamagePlot <- ggplot(data=ecoDamage[1:10,],
aes(x=reorder(EventType, TotalDamage), y=(TotalDamage), fill=TotalDamage )) +
geom_bar(stat="identity") +
coord_flip() +
xlab("Event type") +
ylab("Highest Economic Damage in (Property & Crops) Dollars") +
theme(legend.position="none")
ecoDamagePlot
The analysis from the first plot “Severe Weather Events with the Highest Total of Fatalities” in dicates that Tornados are the severe weather event that caused the most fatalities as well as the most injuries as indicated in the “Severe Weather Events with the Highest Total of Injuries” plot. The last plot, “Highest Economic Damage in (Property & Crops) Dollars,” indicates that Floods are causing the most economic damageHurricanes/Typhoons coming in second, and Tornados coming in third place.