Synopsis

This project aims to answer the question of which types of weather events cause the most damage to (a)Human health and (b)economically in the United States. The underlying data comes form the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database.

The bzip2 file was downloaded from the course website, the csv file was extracted and the data was loaded into R for analysis. Blank or NA values were ignored and from the data:

Data Processing

This section describes how the data was downloaded, loaded into R and processed for analysis.

Downloading the Data

The data comes in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size. You can download the file from the course web site: https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2

library(R.utils)
## Warning: package 'R.utils' was built under R version 3.2.5
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.20.0 (2016-02-17) successfully loaded. See ?R.oo for help.
## 
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
## 
##     getClasses, getMethods
## The following objects are masked from 'package:base':
## 
##     attach, detach, gc, load, save
## R.utils v2.3.0 (2016-04-13) successfully loaded. See ?R.utils for help.
## 
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
## 
##     timestamp
## The following objects are masked from 'package:base':
## 
##     cat, commandArgs, getOption, inherits, isOpen, parse, warnings
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#Check if zipped data file exists and download to working directory if not
if (!file.exists("./storm.csv.bz2")) {
  download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2","./storm.csv.bz2")
}

#Check if csv exists and unzip zipped file otherwise
if (!file.exists("./storm.csv")) {
  bunzip2("./storm.csv.bz2", "./storm.csv", remove = FALSE)
}

Loading the data

#load storm data from csv file
storm <- read.csv("./storm.csv", header = TRUE)

Processing the Data

To make computations faster, select only relevant columns to determine property damage and effects on human health, storing the result in another data frame

relevant_col <- c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
relevant_data<-storm[relevant_col]

Processing data for Human Health outcomes

To see the effect of the weather events on human health in terms of fatalities and injuries, the data is first grouped by event type, before being sorted for fatalities and injuries

#Group events by type
events_group<-group_by(relevant_data, EVTYPE)

#Event that cause the most fatalities
top_fatalities<-summarize(events_group, total = sum(FATALITIES))%>%arrange(desc(total))%>%top_n(10)
## Selecting by total
#Event that cause the most injuries
top_injuries<-summarize(events_group, total = sum(INJURIES))%>%arrange(desc(total))%>%top_n(10)
## Selecting by total

Processing data for economic outcomes

Both property damage and crop damage have an exponent column which measures whether the damage is in hundred, thousands, millions or billions. The numerical damage needs to be multiplied by these factors to obtain the total damage amount.

unique(relevant_data$PROPDMGEXP)
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(relevant_data$CROPDMGEXP)
## [1]   M K m B ? 0 k 2
## Levels:  ? 0 2 B k K m M
Processing Property Damage
relevant_data$PROPDMGEXP <- as.character(relevant_data$PROPDMGEXP)
relevant_data$PROPDMGEXP[is.na(relevant_data$PROPDMGEXP)]=0
relevant_data$PROPDMGEXP = gsub("\\+|\\-|\\?", "0", relevant_data$PROPDMGEXP)
relevant_data$PROPDMGEXP = gsub("H|h", "2", relevant_data$PROPDMGEXP)
relevant_data$PROPDMGEXP = gsub("K|k", "3", relevant_data$PROPDMGEXP)
relevant_data$PROPDMGEXP = gsub("M|m", "6", relevant_data$PROPDMGEXP)
relevant_data$PROPDMGEXP = gsub("B|b", "9", relevant_data$PROPDMGEXP)
relevant_data$PROPDMGEXP<-as.numeric(relevant_data$PROPDMGEXP)
relevant_data$Total_Property_Damage<-relevant_data$PROPDMG*10^relevant_data$PROPDMGEXP

property_damage<-aggregate(Total_Property_Damage~EVTYPE, data = relevant_data, sum)
property_damage_ordered<-property_damage[order(-property_damage$Total_Property_Damage),]
property_damage_ordered_top_ten<-property_damage_ordered[1:10,]
Processing Crop Damage
relevant_data$CROPDMGEXP <- as.character(relevant_data$CROPDMGEXP)
relevant_data$CROPDMGEXP[is.na(relevant_data$CROPDMGEXP)]=0
relevant_data$CROPDMGEXP = gsub("\\+|\\-|\\?", "0", relevant_data$CROPDMGEXP)
relevant_data$CROPDMGEXP = gsub("H|h", "2", relevant_data$CROPDMGEXP)
relevant_data$CROPDMGEXP = gsub("K|k", "3", relevant_data$CROPDMGEXP)
relevant_data$CROPDMGEXP = gsub("M|m", "6", relevant_data$CROPDMGEXP)
relevant_data$CROPDMGEXP = gsub("B|b", "9", relevant_data$CROPDMGEXP)
relevant_data$CROPDMGEXP<-as.numeric(relevant_data$CROPDMGEXP)
relevant_data$Total_Crop_Damage<-relevant_data$CROPDMG*10^relevant_data$CROPDMGEXP

crop_damage<-aggregate(Total_Crop_Damage~EVTYPE, data = relevant_data, sum)
crop_damage_ordered<-crop_damage[order(-crop_damage$Total_Crop_Damage),]
crop_damage_ordered_top_ten<-crop_damage_ordered[1:10,]
Calculating Total Economic Damage

To calculate the total economic impact, the total property damage is added to the total crop damage, for each event.

total_damage<-aggregate(Total_Property_Damage + Total_Crop_Damage ~EVTYPE, data = relevant_data, sum)
names(total_damage)[2]<-"Total"
total_damage_top_ten<-arrange(total_damage, desc(Total))%>%top_n(10)
## Selecting by Total

Results

Weather Events effects on Public health in the United States

The following bar graphs show the number of fatalities and number of injuries occuring as a result of different types of weather events.

par(mfrow=c(1,2))
barplot(top_fatalities$total, names = top_fatalities$EVTYPE, ylab = "Total Fatalities", main = "Fatalities by Weather Event Type", las = 2)
barplot(top_injuries$total, names = top_injuries$EVTYPE, ylab = "Total Injuries", main = "Injuries by Weather Event Type", las = 2)

From the bar graph above, Tornados are the deadliest weather event and they cause the most injuries too.

Weather Events effects on the economy in the United States

The following tables show the weather events that caused the most property damage, crop damage and total damage.

#Events that caused the most property damage
property_damage_ordered_top_ten
##                EVTYPE Total_Property_Damage
## 63              FLOOD          144657709800
## 181 HURRICANE/TYPHOON           69305840000
## 335           TORNADO           56947380674
## 283       STORM SURGE           43323536000
## 51        FLASH FLOOD           16822673772
## 105              HAIL           15735267456
## 173         HURRICANE           11868319010
## 343    TROPICAL STORM            7703890550
## 402      WINTER STORM            6688497251
## 158         HIGH WIND            5270046295
#Events that caused the most crop damage
crop_damage_ordered_top_ten
##               EVTYPE Total_Crop_Damage
## 16           DROUGHT       13972566000
## 35             FLOOD        5661968450
## 99       RIVER FLOOD        5029459000
## 86         ICE STORM        5022113500
## 53              HAIL        3025954470
## 78         HURRICANE        2741910000
## 83 HURRICANE/TYPHOON        2607872800
## 30       FLASH FLOOD        1421317100
## 26      EXTREME COLD        1292973000
## 47      FROST/FREEZE        1094086000
#Events that caused the most total damage
total_damage_top_ten
##               EVTYPE        Total
## 1              FLOOD 138007444500
## 2  HURRICANE/TYPHOON  29348167800
## 3            TORNADO  16570326363
## 4          HURRICANE  12405268000
## 5        RIVER FLOOD  10108369000
## 6               HAIL  10048596590
## 7        FLASH FLOOD   8716525177
## 8          ICE STORM   5925150850
## 9   STORM SURGE/TIDE   4641493000
## 10 THUNDERSTORM WIND   3813647990

The bar graphs below show the economic damage (in billions) resulting from the different weather events.

par(mfrow = c(1,3))
barplot(total_damage_top_ten$Total/10^9, names = total_damage_top_ten$EVTYPE, ylab = "Total Damage ($ Billions)", main = "Total Damage ($) by \n Weather Event", las = 2)
barplot(property_damage_ordered_top_ten$Total_Property_Damage/10^9, names = property_damage_ordered_top_ten$EVTYPE, ylab = "Property Damage ($ Billions)", main = "Property Damage ($) by \n Weather Event", las = 2)
barplot(crop_damage_ordered_top_ten$Total_Crop_Damage/10^9, names = crop_damage_ordered_top_ten$EVTYPE, ylab = "Crop Damage ($ Billions)", main = "Crop Damage ($) by \n Weather Event", las = 2)

From the bar graphs above, we see that floods caused the most property damage, while droughts caused the most crop damage. Overall, floods caused the most economic damage.