Title: Effects of Weather Events on Human Life and Economic Factors
The purpose of this analyze the effects of different Weather events on Human Life and the Economy within the US. The information provided in the database is from 1950 to 2011. This analysis takes into account all years. For the purposes of Human Life effects, both fatalities and injuries were accounted for, in addition, a total incident metric was created based upont he sum of the two former factors. With relation to the economy, the damage done to properties and crops across the US are used.
For this section, we are going to read in the data.
The data comes from (https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2) as of June 17, 2015.
The following downloads, unzips, and loads the data into R.
library(dplyr, quietly=TRUE)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(curl, quietly=TRUE)
library(qdap, quietly=TRUE)
##
## Attaching package: 'qdapRegex'
##
## The following objects are masked from 'package:dplyr':
##
## escape, explain
##
##
## Attaching package: 'qdapTools'
##
## The following object is masked from 'package:dplyr':
##
## id
##
##
## Attaching package: 'qdap'
##
## The following object is masked from 'package:dplyr':
##
## %>%
##
## The following object is masked from 'package:base':
##
## Filter
#download and unzip data
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
data <- read.csv(bzfile("Data.csv.bz2"))
datadf <- tbl_df(data)
The following is the processing of the data for analysis and results section. Processing will include:
##Summing Fatalities, Injuries and Total Casualties (Fatalities+Injuries)
Grouped <- select(datadf, EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
##Property Damage and Crop Damage Manipulation
Grouped$PROPDMGEXP <- gsub("^K|^k", 3, Grouped$PROPDMGEXP)
Grouped$PROPDMGEXP <- gsub("^M|^m", 6, Grouped$PROPDMGEXP)
Grouped$PROPDMGEXP <- gsub("^B|^b", 9, Grouped$PROPDMGEXP)
Grouped$PROPDMGEXP <- gsub("[:punct:]", 1, Grouped$PROPDMGEXP)
Grouped$PROPDMGEXP <- gsub("[:space:]", 1, Grouped$PROPDMGEXP)
Grouped$CROPDMGEXP <- gsub("^K|^k", 3, Grouped$CROPDMGEXP)
Grouped$CROPDMGEXP <- gsub("^M|^m", 6, Grouped$CROPDMGEXP)
Grouped$CROPDMGEXP <- gsub("^B|^b", 9, Grouped$CROPDMGEXP)
Grouped$CROPDMGEXP <- gsub("[:punct:]", 1, Grouped$CROPDMGEXP)
Grouped$CROPDMGEXP <- gsub("[:space:]", 1, Grouped$CROPDMGEXP)
#create damage values; in Millions of US Dollars
Grouped1 <- Grouped %>% mutate(totalcasualties = FATALITIES + INJURIES, prop = ifelse(is.na(PROPDMG*10^as.numeric(PROPDMGEXP)),0,PROPDMG*10^as.numeric(PROPDMGEXP))/10^6, crop = ifelse(is.na(CROPDMG*10^as.numeric(CROPDMGEXP)),0,CROPDMG*10^as.numeric(CROPDMGEXP))/10^6) %>% select(EVTYPE, FATALITIES, INJURIES, totalcasualties, prop, crop)
## Warning in ifelse(is.na(c(25, 2.5, 25, 2.5, 2.5, 2.5, 2.5, 2.5, 25, 25, :
## NAs introduced by coercion
## Warning in ifelse(is.na(c(25, 2.5, 25, 2.5, 2.5, 2.5, 2.5, 2.5, 25, 25, :
## NAs introduced by coercion
## Warning in ifelse(is.na(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, :
## NAs introduced by coercion
## Warning in ifelse(is.na(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, :
## NAs introduced by coercion
##Checking data for no NAs.
sum(is.na(Grouped1))
## [1] 0
Grouped2 <- group_by(Grouped1, EVTYPE)
Grouped3 <- Grouped2 %>% summarise(fatalsum = sum(FATALITIES), injsum = sum(INJURIES), totsum=sum(totalcasualties)) %>% ungroup()
##event type manipulation
Grouped3$EVTYPE <- toupper(Grouped3$EVTYPE)
mod <- Grouped3$EVTYPE[grep("BEACH|COAST", Grouped3$EVTYPE)]
Grouped3$EVTYPE <- mgsub(mod, "COAST", Grouped3$EVTYPE)
mod <- Grouped3$EVTYPE[grep("FLASH|FLOOD&!(BEACH|COAST)", Grouped3$EVTYPE)]
Grouped3$EVTYPE <- mgsub(mod, "FLOOD", Grouped3$EVTYPE)
mod <- Grouped3$EVTYPE[grep("THUNDER|TSTM", Grouped3$EVTYPE)]
Grouped3$EVTYPE <- mgsub(mod, "THUNDERSTORM", Grouped3$EVTYPE)
mod <- Grouped3$EVTYPE[grep("WIND", Grouped3$EVTYPE)]
Grouped3$EVTYPE <- mgsub(mod, "WIND", Grouped3$EVTYPE)
mod <- Grouped3$EVTYPE[grep("SNOW|ICE", Grouped3$EVTYPE)]
Grouped3$EVTYPE <- mgsub(mod, "SNOW", Grouped3$EVTYPE)
mod <- Grouped3$EVTYPE[grep("HEAT|RECORD HIGH", Grouped3$EVTYPE)]
Grouped3$EVTYPE <- mgsub(mod, "HEAT", Grouped3$EVTYPE)
mod <- Grouped3$EVTYPE[grep("RAIN", Grouped3$EVTYPE)]
Grouped3$EVTYPE <- mgsub(mod, "RAIN", Grouped3$EVTYPE)
#resum
Grouped4 <- Grouped3 %>% group_by(EVTYPE) %>% summarise(fatalsum = sum(fatalsum), injsum = sum(injsum), totsum=sum(totsum)) %>% ungroup()
#prep for plots on incidents; top 10 only per fatalsum, injsum, totsum
fatalorder <- Grouped4 %>% mutate(fatal = "fatal") %>% select(EVTYPE, fatal, fatalsum) %>% arrange(desc(fatalsum)) %>% head(n=10)
colnames(fatalorder) <- c("EVTYPE", "category", "sum")
injorder <- Grouped4 %>% mutate(inj = "injury") %>% select(EVTYPE, inj, injsum) %>% arrange(desc(injsum)) %>% head(n=10)
colnames(injorder) <- c("EVTYPE", "category", "sum")
totorder <- Grouped4 %>% mutate(tot = "total") %>% select(EVTYPE, tot, totsum) %>% arrange(desc(totsum)) %>% head(n=10)
colnames(totorder) <- c("EVTYPE", "category", "sum")
stacked <- rbind(fatalorder, injorder, totorder)
#prep for plots on monetary damage; top 10 only per property or crop
damage <- Grouped2 %>% select(EVTYPE, prop, crop) %>% summarise(propsum = sum(prop), cropsum=sum(crop))
proporder <- damage %>% mutate(propcat = "prop") %>% select(EVTYPE, propcat, propsum) %>% arrange(desc(propsum)) %>% head(n=10)
colnames(proporder) <- c("EVTYPE", "category", "sum")
croporder <- damage %>% mutate(cropcat = "crop") %>% select(EVTYPE, cropcat, cropsum) %>% arrange(desc(cropsum)) %>% head(n=10)
colnames(croporder) <- c("EVTYPE", "category", "sum")
damagestack <- rbind(proporder, croporder)
For this section, I am going to assume that human costs is FATALITIES + INJURIES which I am calling totsum.
We will determine the top 10 of each by vieweing a barchart of the incident types.
If you note in Figure 1 below, you will notice that Tornado and Heat related Event Types have the highest total Incidents. Tornados in general have the highest number of incidents by either fatal, injury or total.
library(ggplot2)
##
## Attaching package: 'ggplot2'
##
## The following object is masked from 'package:qdapRegex':
##
## %+%
library(lattice)
#barplot for incident by event type
barchart(EVTYPE ~ sum, data = stacked, groups = category, xlab = "Total Incidents", ylab = "Event Type", main = "FIG 1. Top 10 reasons by Incident Type (fatal, injury, total) \n for all years", auto.key=list(space = "bottom", title="", columns = 3))
If you note in Figure 2 below, when it comes to monetary damage, Tornado and Heat no longer hold the top 2 spots and you will notice less overlap between the top 10 spots for Crop vs. Property damage.
Floods are #1 for property damage at $144.7 Billion and droughts are #1 for crop damage at $14 Billion.
library(ggplot2)
library(lattice)
#barplot for economic damage to property and crops by event type
barchart(EVTYPE ~ sum, data = damagestack, groups = category, xlab = "Total Incidents", ylab = "Event Type", main = "FIG 2. Top 10 reasons by Damage Type (Property, Crop) \n for all years in Millions of US Dollars", auto.key=list(space = "bottom", title="", columns = 2))
The previous analysis should be of help when planning emergency/disaster relief funding within the US at a State and Federal level. There is no question, that the country as a whole should exercise caution when dealing with Floods from an economic perspective and Tornados from a Public Safety/Health perspective.