Synopsis

This is an analysis of the severity of the of weather events for the US. The data is from the NOAA Storm Database.

First we must download our data and document when and where we got it

setwd("~/Box Sync/Coursera/Reproducible Research/Week 3")
if(!file.exists("repdata-data-StormData.csv")){
        fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
        download.file(url = fileUrl, destfile = "repdata-data-StormData.csv", method = "curl")
        downloadDate <- date()
        }

Our next task is to read in the data and look at its format

storm <- read.csv(file = "repdata-data-StormData.csv")
dim(storm)
## [1] 902297     37
head(storm)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6

Data Processing

Here, we will determine which types of events have the greatest economic consequences across the US. The code has been interpreted as H, h, 2 = Hundred K, k, 3 = Thousand 4 = Ten thousand 5 = Hundred thousand M, m, 6 = Million 7 = Ten Million 8 = Hundred Million B, b, 9 = Billion - , +, ? = NA characters

library(dplyr)
names(storm) <- tolower(names(storm)) # Make column names lower case for easier typsetting
damage <- storm %>% select(evtype, propdmg, propdmgexp, cropdmg, cropdmgexp) %>%
        # Convert property damage data for calculation
          mutate(propexpense = ifelse(propdmgexp=="B"|propdmgexp=="b"|propdmgexp=="9", 1e9*propdmg, 
                          ifelse(propdmgexp=="M"|propdmgexp=="m"|propdmgexp=="6", 1e6*propdmg,
                          ifelse(propdmgexp=="K"|propdmgexp=="k"|propdmgexp=="3", 1e3*propdmg, 
                          ifelse(propdmgexp=="H"|propdmgexp=="h"|propdmgexp=="2", 1e2*propdmg,
                          ifelse(propdmgexp=="0", propdmg, 
                          ifelse(propdmgexp=="1", 10*propdmg, 
                          ifelse(propdmgexp=="4", 1e4*propdmg, 
                          ifelse(propdmgexp=="5", 1e5*propdmg, 
                          ifelse(propdmgexp=="7", 1e7*propdmg,
                          ifelse(propdmgexp=="8", 1e8*propdmg, 
                          ifelse(propdmgexp %in% c("-","+","?"), NA, propdmg)))))))))))) %>%
        # Convert crop damage data for calculation
        mutate(cropexpense = ifelse(cropdmgexp=="B"|cropdmgexp=="b"|cropdmgexp=="9", 1e9*cropdmg, 
                          ifelse(cropdmgexp=="M"|cropdmgexp=="m"|cropdmgexp=="6", 1e6*cropdmg,
                          ifelse(cropdmgexp=="K"|cropdmgexp=="k"|cropdmgexp=="3", 1e3*cropdmg, 
                          ifelse(cropdmgexp=="H"|cropdmgexp=="h"|cropdmgexp=="2", 1e2*cropdmg,
                          ifelse(cropdmgexp=="0", cropdmg, 
                          ifelse(cropdmgexp=="1", 10*cropdmg, 
                          ifelse(cropdmgexp=="4", 1e4*cropdmg, 
                          ifelse(cropdmgexp=="5", 1e5*cropdmg, 
                          ifelse(cropdmgexp=="7", 1e7*cropdmg,
                          ifelse(cropdmgexp=="8", 1e5*cropdmg, 
                          ifelse(cropdmgexp %in% c("-","+","?"), NA, cropdmg))))))))))))

totalpdamages <- arrange(aggregate(propdmg ~ evtype, data = damage, FUN = sum), desc(propdmg))
totalcdamages <- arrange(aggregate(cropdmg ~ evtype, data = damage, FUN = sum), desc(cropdmg))
prop <- top_n(totalpdamages, 10, propdmg)
crop <- top_n(totalcdamages, 10, cropdmg)
top <- merge(prop, crop, by = "evtype")
head(top)
##               evtype   propdmg   cropdmg
## 1        FLASH FLOOD 1420124.6 179200.46
## 2              FLOOD  899938.5 168037.88
## 3               HAIL  688693.4 579596.28
## 4          HIGH WIND  324731.6  17283.21
## 5  THUNDERSTORM WIND  876844.2  66791.45
## 6 THUNDERSTORM WINDS  446293.2  18684.93
library(tidyr)
topdamage <- top %>% gather(Type, Expense, c(propdmg, cropdmg))
head(topdamage)
##               evtype    Type   Expense
## 1        FLASH FLOOD propdmg 1420124.6
## 2              FLOOD propdmg  899938.5
## 3               HAIL propdmg  688693.4
## 4          HIGH WIND propdmg  324731.6
## 5  THUNDERSTORM WIND propdmg  876844.2
## 6 THUNDERSTORM WINDS propdmg  446293.2

Results

# Plot
ggplot(topdamage, aes(x = evtype, y = Expense, fill = evtype)) + geom_bar(stat = 'identity') + facet_grid(Type ~ .) + ggtitle(label = "Economic damage across the US") + xlab("") + ylab("Expense in $")

This graphs tells us that Tornados are most responsible for property damage, but Hail is responsible for most crop damages

The next code chunk will determine which types of events are most harmful with respect to population health across the US

library(dplyr)
library(tidyr)
library(ggplot2)
health <- select(storm, evtype, fatalities, injuries)
fatal <- arrange(aggregate(fatalities ~ evtype, data = health, FUN = sum), desc(fatalities))
harmful <- arrange(aggregate(injuries ~ evtype, data = health, FUN = sum), desc(injuries))
topfatals <- top_n(fatal, 10, fatalities)
topinjuries <- top_n(harmful, 10, injuries)
harm <- merge(topfatals, topinjuries, by = "evtype")
topharm <- harm %>% gather(harmt, count, c(fatalities, injuries))

# Plots

ggplot(topharm, aes(x = evtype, y = count, fill = evtype, xlab = "")) + geom_bar(stat = 'identity') + facet_grid(harmt ~ .) + ggtitle(label = "Top 10 harmful events across the US")

The graphs above show us that Tornados are responsible for most fatalities and injuries