Severe weather conditions are known to be the cause of public concern and economic consequences.
The aim of this project is to explore the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database, which tracks several characteristics of weather events in the United States, including their location, date, and their consequences, such as personal injuries and property damage.
As part of this report we will address the following questions:
Across the United States, which types of events have the greatest economic consequences? After aggregating the data by type of event, we can see the following outcomes:
In terms of economic consequences, floods have the highest impact
The following is the reading and initialization of the dataset:
#Read and initialize library and data
library(plyr)
## Warning: package 'plyr' was built under R version 3.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.3
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.4.3
library(grid)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.3
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#downloading the datasource:
setwd("U:/EA/Course 5/Week 4")
if(!file.exists('repdata%2Fdata%2FStormData.csv')){
+ unzip('repdata%2Fdata%2FStormData.csv.bz2')}
# Loading the file into a data frame
stormData <- read.csv(("repdata%2Fdata%2FStormData.csv"), sep=",", header = TRUE)
# Use only relevant columns
stormDataRed <- stormData[,c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
names(stormDataRed)
## [1] "EVTYPE" "FATALITIES" "INJURIES" "PROPDMG" "PROPDMGEXP"
## [6] "CROPDMG" "CROPDMGEXP"
# Summarize fatalities and injuries
harm2health <- ddply(stormDataRed, .(EVTYPE), summarize,fatalities = sum(FATALITIES),injuries = sum(INJURIES))
fatal <- harm2health[order(harm2health$fatalities, decreasing = T), ]
injury <- harm2health[order(harm2health$injuries, decreasing = T), ]
# Summarize Economic
# Entries in the variables PROPDMG and CROPDMG are not directly processable,
# as they have exponential values where h = hundred, k = thousand, m =
# million, b = billion. They need to be preprocessed
getExp <- function(e) {
if (e %in% c("h", "H"))
return(2)
else if (e %in% c("k", "K"))
return(3)
else if (e %in% c("m", "M"))
return(6)
else if (e %in% c("b", "B"))
return(9)
else if (!is.na(as.numeric(e)))
return(as.numeric(e))
else if (e %in% c("", "-", "?", "+"))
return(0)
else {
stop("Invalid value.")
}
}
# values are calculated for property damage and crop damage
propExp <- sapply(stormDataRed$PROPDMGEXP, FUN=getExp)
stormDataRed$propDamage <- stormDataRed$PROPDMG * (10 ** propExp)
cropExp <- sapply(stormDataRed$CROPDMGEXP, FUN=getExp)
stormDataRed$cropDamage <- stormDataRed$CROPDMG * (10 ** cropExp)
# financial damage for crops and property have to be summarized according to the event type
econDamage <- ddply(stormDataRed, .(EVTYPE), summarize,propDamage = sum(propDamage), cropDamage = sum(cropDamage))
# omit events not causing any financial damage
econDamage <- econDamage[(econDamage$propDamage > 0 | econDamage$cropDamage > 0), ]
# sort data
propDmgSorted <- econDamage[order(econDamage$propDamage, decreasing = T), ]
cropDmgSorted <- econDamage[order(econDamage$cropDamage, decreasing = T), ]
# plots of the Top 10 events
injuriesPlot <- ggplot(data=head(injury,10), aes(x=reorder(EVTYPE, injuries), y=injuries)) +
geom_bar(fill="cornflowerblue",stat="identity") + coord_flip() +
ylab("Total number of injuries") + xlab("Event type") +
ggtitle("Health impact of weather events in the US - Top 10") +
theme(legend.position="none")
fatalitiesPlot <- ggplot(data=head(fatal,10), aes(x=reorder(EVTYPE, fatalities), y=fatalities)) +
geom_bar(fill="salmon",stat="identity") + coord_flip() +
ylab("Total number of fatalities") + xlab("Event type") +
theme(legend.position="none")
grid.arrange(fatalitiesPlot, injuriesPlot, nrow =2)
## 2. Across the United States, which types of events have the greatest economic consequences?
# plots of the Top 10 events
PropDamagePlot <- ggplot(data=head(propDmgSorted,10), aes(x=reorder(EVTYPE, propDamage), y=log10(propDamage), fill=propDamage )) +
geom_bar(fill="hotpink2", stat="identity") + coord_flip() +
xlab("Event type") + ylab("Property damage in dollars (log10)") +
ggtitle("Economic impact of weather events in the US - Top 10") +
theme(plot.title = element_text(hjust = 0))
CropdamagePlot <- ggplot(data=head(cropDmgSorted,10), aes(x=reorder(EVTYPE, cropDamage), y=cropDamage, fill=cropDamage)) +
geom_bar(fill="royalblue", stat="identity") + coord_flip() +
xlab("Event type") + ylab("Crop damage in dollars") +
theme(legend.position="none")
grid.arrange(PropDamagePlot, CropdamagePlot, ncol=1, nrow =2)