Synopsis
This report intends to demonstrate the effects of meteorological phenomena on the American population as storms and other severe climatic events. These phenomena can cause public and economic health problems for communities and municipalities. To perform this work, we will explore the US National Oceanic and Atmospheric Administration (NOAA) storm database. This database tracks the characteristics of major storms and climatic events in the United States and has information from the year 1950 to 2011. More recent years should be considered more complete. This information includes when and where these events occur, as well as estimates of any fatalities, injuries and property damage. This report aims to answer some questions such as: What types of events are most harmful to the health of the population and which types of events produce the greatest economic consequences.
Loading and Processing the Raw Data
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(ggplot2)
## Loading required package: ggplot2
library(tidyr)
the dataset was obtained from the address below
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = paste0(getwd(), "/repdata%2Fdata%2FStormData.csv.bz2"), method = "curl")
stormdata <- read.csv(file="repdata%2Fdata%2FStormData.csv.bz2", header=TRUE, sep=",")
dim(stormdata)
## [1] 902297 37
head(stormdata)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
We will also convert the units to damage estimates that are in the form of letters for numerical multipliers.
stormdata$PROPDMGEXP <- gsub("[^HhKkMmBb]", "1", stormdata$PROPDMGEXP)
stormdata$PROPDMGEXP <- gsub("[Hh]", "100", stormdata$PROPDMGEXP)
stormdata$PROPDMGEXP <- gsub("[Kk]", "1000", stormdata$PROPDMGEXP)
stormdata$PROPDMGEXP <- gsub("[Mm]", "1000000", stormdata$PROPDMGEXP)
stormdata$PROPDMGEXP <- gsub("[Bb]", "1000000000", stormdata$PROPDMGEXP)
stormdata$PROPDMGEXP[stormdata$PROPDMGEXP == ""] <- "1"
stormdata$PROPDMGEXP <- as.numeric(stormdata$PROPDMGEXP)
stormdata$CROPDMGEXP <- gsub("[^HhKkMmBb]", "1", stormdata$CROPDMGEXP)
stormdata$CROPDMGEXP <- gsub("[Hh]", "100", stormdata$CROPDMGEXP)
stormdata$CROPDMGEXP <- gsub("[Kk]", "1000", stormdata$CROPDMGEXP)
stormdata$CROPDMGEXP <- gsub("[Mm]", "1000000", stormdata$CROPDMGEXP)
stormdata$CROPDMGEXP <- gsub("[Bb]", "1000000000", stormdata$CROPDMGEXP)
stormdata$CROPDMGEXP[stormdata$CROPDMGEXP == ""] <- "1"
stormdata$CROPDMGEXP <- as.numeric(stormdata$CROPDMGEXP)
We will select variables that relate to events that are detrimental to population health
library(dplyr, warn.conflicts = FALSE, quietly=TRUE)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
health <- stormdata %>%
select(EVTYPE, FATALITIES, INJURIES) %>%
group_by(EVTYPE) %>%
summarize(EVENTS = n(), FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES)) %>%
mutate(TOTINJ = FATALITIES + INJURIES) %>%
arrange(desc(TOTINJ))
we will select only 5 events for better visualization
Red_Health <- health[1:5,]
Red_Health$EVTYPE <- as.character(Red_Health$EVTYPE)
Red_Health$EVTYPE <- factor(Red_Health$EVTYPE, levels=unique(Red_Health$EVTYPE))
Red_Health <- melt(Red_Health, id = c("EVTYPE", "EVENTS"), measure.vars = c("FATALITIES","INJURIES"))
Events that have the greatest economic consequences
economy <- stormdata %>%
mutate(PROPDMG = PROPDMG * PROPDMGEXP / 1e9, CROPDMG = CROPDMG * CROPDMGEXP / 1e9) %>%
group_by(EVTYPE) %>%
summarize(EVENTS = n(), PROPDMG = sum(PROPDMG), CROPDMG = sum(CROPDMG)) %>%
mutate(TOTDMG = PROPDMG + CROPDMG) %>%
arrange(desc(TOTDMG))
Red_economy <- economy[1:5,]
Red_economy$EVTYPE <- as.character(Red_economy$EVTYPE)
Red_economy$EVTYPE <- factor(Red_economy$EVTYPE, levels=unique(Red_economy$EVTYPE))
Red_economy <- melt(Red_economy, id = c("EVTYPE", "EVENTS"), measure.vars = c("PROPDMG","CROPDMG"))
Results
he graph below shows that the Tornado is the storm-related event most damaging to people’s health
ggplot(data=Red_Health, aes(x=EVTYPE, y=value, fill=variable)) +
geom_bar(stat="Identity") +
ggtitle("Events with Largest Health Impact") +
xlab("Event Type") +
ylab("Injuries & Fatalities") +
theme(legend.position=c(0.9,0.8), legend.title=element_blank())
The chart below shows that Flood and Hurricanes were identified as the most economically productive events in the population
ggplot(data=Red_economy, aes(x=EVTYPE,y=value, fill=variable)) +
geom_bar(stat="Identity") +
ggtitle("Events with Largest Economic Impact") +
xlab("Event Type") +
ylab("Total Cost ($Billion USD)") +
theme(legend.position=c(0.9,0.8), legend.title=element_blank()) +
scale_fill_discrete(labels=c("Property Damage","Crop Damage"))