This is an analysis of the severity of the of weather events for the US. The data is from the NOAA Storm Database.
First we must download our data and document when and where we got it
setwd("~/Box Sync/Coursera/Reproducible Research/Week 3")
if(!file.exists("repdata-data-StormData.csv")){
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url = fileUrl, destfile = "repdata-data-StormData.csv", method = "curl")
downloadDate <- date()
}
Our next task is to read in the data and look at its format
storm <- read.csv(file = "repdata-data-StormData.csv")
dim(storm)
## [1] 902297 37
head(storm)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
Here, we will determine which types of events have the greatest economic consequences across the US. The code has been interpreted as H, h, 2 = Hundred K, k, 3 = Thousand 4 = Ten thousand 5 = Hundred thousand M, m, 6 = Million 7 = Ten Million 8 = Hundred Million B, b, 9 = Billion - , +, ? = NA characters
library(dplyr)
names(storm) <- tolower(names(storm)) # Make column names lower case for easier typsetting
damage <- storm %>% select(evtype, propdmg, propdmgexp, cropdmg, cropdmgexp) %>%
# Convert property damage data for calculation
mutate(propexpense = ifelse(propdmgexp=="B"|propdmgexp=="b"|propdmgexp=="9", 1e9*propdmg,
ifelse(propdmgexp=="M"|propdmgexp=="m"|propdmgexp=="6", 1e6*propdmg,
ifelse(propdmgexp=="K"|propdmgexp=="k"|propdmgexp=="3", 1e3*propdmg,
ifelse(propdmgexp=="H"|propdmgexp=="h"|propdmgexp=="2", 1e2*propdmg,
ifelse(propdmgexp=="0", propdmg,
ifelse(propdmgexp=="1", 10*propdmg,
ifelse(propdmgexp=="4", 1e4*propdmg,
ifelse(propdmgexp=="5", 1e5*propdmg,
ifelse(propdmgexp=="7", 1e7*propdmg,
ifelse(propdmgexp=="8", 1e8*propdmg,
ifelse(propdmgexp %in% c("-","+","?"), NA, propdmg)))))))))))) %>%
# Convert crop damage data for calculation
mutate(cropexpense = ifelse(cropdmgexp=="B"|cropdmgexp=="b"|cropdmgexp=="9", 1e9*cropdmg,
ifelse(cropdmgexp=="M"|cropdmgexp=="m"|cropdmgexp=="6", 1e6*cropdmg,
ifelse(cropdmgexp=="K"|cropdmgexp=="k"|cropdmgexp=="3", 1e3*cropdmg,
ifelse(cropdmgexp=="H"|cropdmgexp=="h"|cropdmgexp=="2", 1e2*cropdmg,
ifelse(cropdmgexp=="0", cropdmg,
ifelse(cropdmgexp=="1", 10*cropdmg,
ifelse(cropdmgexp=="4", 1e4*cropdmg,
ifelse(cropdmgexp=="5", 1e5*cropdmg,
ifelse(cropdmgexp=="7", 1e7*cropdmg,
ifelse(cropdmgexp=="8", 1e5*cropdmg,
ifelse(cropdmgexp %in% c("-","+","?"), NA, cropdmg))))))))))))
totalpdamages <- arrange(aggregate(propdmg ~ evtype, data = damage, FUN = sum), desc(propdmg))
totalcdamages <- arrange(aggregate(cropdmg ~ evtype, data = damage, FUN = sum), desc(cropdmg))
prop <- top_n(totalpdamages, 10, propdmg)
crop <- top_n(totalcdamages, 10, cropdmg)
top <- merge(prop, crop, by = "evtype")
head(top)
## evtype propdmg cropdmg
## 1 FLASH FLOOD 1420124.6 179200.46
## 2 FLOOD 899938.5 168037.88
## 3 HAIL 688693.4 579596.28
## 4 HIGH WIND 324731.6 17283.21
## 5 THUNDERSTORM WIND 876844.2 66791.45
## 6 THUNDERSTORM WINDS 446293.2 18684.93
library(tidyr)
topdamage <- top %>% gather(Type, Expense, c(propdmg, cropdmg))
head(topdamage)
## evtype Type Expense
## 1 FLASH FLOOD propdmg 1420124.6
## 2 FLOOD propdmg 899938.5
## 3 HAIL propdmg 688693.4
## 4 HIGH WIND propdmg 324731.6
## 5 THUNDERSTORM WIND propdmg 876844.2
## 6 THUNDERSTORM WINDS propdmg 446293.2
# Plot
ggplot(topdamage, aes(x = evtype, y = Expense, fill = evtype)) + geom_bar(stat = 'identity') + facet_grid(Type ~ .) + ggtitle(label = "Economic damage across the US") + xlab("") + ylab("Expense in $")
This graphs tells us that Tornados are most responsible for property damage, but Hail is responsible for most crop damages
The next code chunk will determine which types of events are most harmful with respect to population health across the US
library(dplyr)
library(tidyr)
library(ggplot2)
health <- select(storm, evtype, fatalities, injuries)
fatal <- arrange(aggregate(fatalities ~ evtype, data = health, FUN = sum), desc(fatalities))
harmful <- arrange(aggregate(injuries ~ evtype, data = health, FUN = sum), desc(injuries))
topfatals <- top_n(fatal, 10, fatalities)
topinjuries <- top_n(harmful, 10, injuries)
harm <- merge(topfatals, topinjuries, by = "evtype")
topharm <- harm %>% gather(harmt, count, c(fatalities, injuries))
# Plots
ggplot(topharm, aes(x = evtype, y = count, fill = evtype, xlab = "")) + geom_bar(stat = 'identity') + facet_grid(harmt ~ .) + ggtitle(label = "Top 10 harmful events across the US")
The graphs above show us that Tornados are responsible for most fatalities and injuries