Synopsis

Between 1950 and 2011 the US experienced more than 15 000 fatalities and in excess of 140 000 injuries due to severe weather events of which the most harmful were tornados, excessive heat, floods, and tstm wind. During the same period, severe weather events caused more than $ 400 billion in property damage and approximately $ 50 billion in crop damage. Floods, hurricanes, typhoons and tornados caused most property damage while drought, floods and ice storms were most damaging to crops.

Data Processing

## Set global thousand separator
knit_hooks$set(inline = function(x) {
  prettyNum(x, big.mark=" ")
}, digits=0)

## Remember to set your working directory
setwd("C:\\Users\\olalie\\Documents\\DataScience\\5_Reproducible_Research\\COUSE_PROJECTS\\PA2")

## Get the raw data:
if(!file.exists("repdata-data-StormData.csv.bz2")){
    
    ## Download files...
    ## (you might need to add argument: method = \"curl\"")
    download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "repdata-data-StormData.csv.bz2")
}

## Load data (SWE = Severe Weather Events)
if(!(exists("SWE") && is.data.frame(get("SWE")))){    
    SWE <- read.csv("repdata-data-StormData.csv.bz2")
}

## Select only the columns we need
library(dplyr)
SWE <- select(SWE, BGN_DATE, EVTYPE, FATALITIES,
                       INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

## Format date and make a numeric MONTH column (for the plot later on)
SWE$BGN_DATE <- as.Date(SWE$BGN_DATE, format = '%m/%d/%Y')
SWE$MONTH <- as.numeric(format(SWE$BGN_DATE, "%m"))

Sys.setlocale("LC_TIME", "English") #use English date names
## [1] "English_United States.1252"
startDate <- format(min(SWE$BGN_DATE),format= "%B %d. %Y")
finishDate <-format(max(SWE$BGN_DATE),format= "%B %d. %Y")

## Data Transformations:
## Multiply values for PROPDMG and CROPDMG with their exponents
## Others than H,K,M,B and "" are insignificant and given 1.
multiplier <- function(exp) {
    switch(exp,
           H=100,
           K=1000,
           M=1000000,
           B=1000000000,
           1)
}

SWE$PROPCASH <- SWE$PROPDMG * 
    unname(mapply(multiplier, as.character(SWE$PROPDMGEXP)))

SWE$CROPCASH <- SWE$CROPDMG * 
    unname(mapply(multiplier, as.character(SWE$CROPDMGEXP)))

## Totals (used in synopsis, just below, and in calculating percentages in plot)
totPropCash <- sum(SWE$PROPCASH)
totCropCash <- sum(SWE$CROPCASH)
totFatalities <- sum(SWE$FATALITIES)
totInjuries <- sum(SWE$INJURIES)

Results

The recorded data include events between January 03. 1950 and November 30. 2011.

Total fatalities were 15 145.

Total injuries were 140 528.

Total property damage was $ 427 279 752 813.

Total crop damage was $ 49 093 756 627.

Fatalities

There have been most fatalities due to tornados, excessive heat and flash floods.

library(knitr)
library(dplyr, quietly=T)
kable(head(SWE  %>% group_by(EVTYPE) %>%
               summarise(SUMFATALITIES = sum(FATALITIES)) %>%
               arrange(desc(SUMFATALITIES))  %>%
               select(EVTYPE,SUMFATALITIES),10),col.names=c("Top Ten Event Types", "Fatalities"))
Top Ten Event Types Fatalities
TORNADO 5633
EXCESSIVE HEAT 1903
FLASH FLOOD 978
HEAT 937
LIGHTNING 816
TSTM WIND 504
FLOOD 470
RIP CURRENT 368
HIGH WIND 248
AVALANCHE 224

Injuries

There have been most injuries due to tornados, tsm winds and floods.

library(knitr)
library(dplyr)
kable(head(SWE  %>% group_by(EVTYPE) %>%
               summarise(SUMINJURIES = sum(INJURIES)) %>%
               arrange(desc(SUMINJURIES))  %>%
               select(EVTYPE,SUMINJURIES),10),col.names=c("Top Ten Event Types", "Injuries"))
Top Ten Event Types Injuries
TORNADO 91346
TSTM WIND 6957
FLOOD 6789
EXCESSIVE HEAT 6525
LIGHTNING 5230
HEAT 2100
ICE STORM 1975
FLASH FLOOD 1777
THUNDERSTORM WIND 1488
HAIL 1361

Poroperty Damage

There have been most property damage due to floods, hurricanes/typhoons and tornados.

library(knitr)
library(dplyr)
kable(head(SWE  %>% group_by(EVTYPE) %>%
               summarise(SUMPROPCASH = sum(PROPCASH)) %>%
               arrange(desc(SUMPROPCASH))  %>%
               select(EVTYPE,SUMPROPCASH),10),col.names=c("Top Ten Event Types", "Property Damage"))
Top Ten Event Types Property Damage
FLOOD 144657709807
HURRICANE/TYPHOON 69305840000
TORNADO 56925660790
STORM SURGE 43323536000
FLASH FLOOD 16140812067
HAIL 15727367548
HURRICANE 11868319010
TROPICAL STORM 7703890550
WINTER STORM 6688497251
HIGH WIND 5270046295

Crop Damage

There have been most crop damage due to drought, floods, and ice storms.

library(knitr)
library(dplyr)
kable(head(SWE  %>% group_by(EVTYPE) %>%
               summarise(SUMCROPCASH = sum(CROPCASH)) %>%
               arrange(desc(SUMCROPCASH))  %>%
               select(EVTYPE,SUMCROPCASH),10),col.names=c("Top Ten Event Types", "Crop Damage"))
Top Ten Event Types Crop Damage
DROUGHT 13972566000
FLOOD 5661968450
RIVER FLOOD 5029459000
ICE STORM 5022113500
HAIL 3025537890
HURRICANE 2741910000
HURRICANE/TYPHOON 2607872800
FLASH FLOOD 1421317100
EXTREME COLD 1292973000
FROST/FREEZE 1094086000

Monthly variations

There have been most fatalities in July, April, and May.

There have been most injuries in April, May, and June.

There have been most property damage in January, August and September.

There have been most crop damage in in August, September and June.

library(dplyr)
TOTBYMONTH <- SWE  %>% group_by(MONTH) %>%
    summarise(SUMFATALITIES = sum(FATALITIES),
              SUMINJURIES = sum(INJURIES),
              SUMPROPCASH = sum(PROPCASH),
              SUMCROPCASH = sum(CROPCASH)) %>%
    arrange(MONTH)  %>%
    select(MONTH,SUMFATALITIES,SUMINJURIES,SUMPROPCASH,SUMCROPCASH)

tbm <-t(TOTBYMONTH) ## transpose dataframe
colnames(tbm)<-month.abb ## add months as column names
tbm <- tbm[-1,] ## remove row with month number

## percentage multiplier
pm <- matrix(data=c(100/totFatalities,100/totInjuries,100/totPropCash,100/totCropCash), nrow = 4, ncol = 1)

## calculate percentages
p <- tbm ## let p be a 4x12 matrix with the same dim as tbm
for (i in 1:4 ) {
    p[i,] <- tbm[i,]*pm[i]
}

barplot(p,beside=T,main="Percentage Fatalities, Injuries, Crop- and Property Damage by Month",
        ylab="A month's contribution as a percentage of all months",xlab="Month",
        col=c("lightskyblue3","maroon2","olivedrab3","orange2"),
        legend = c("Fatalities", "Injuries", "Property Damage","Crop Damage"),
        space = c(0,3),
        args.legend = list(x = "topright", cex = 1.0), ylim = c(0, 30))