This project aims to answer the question of which types of weather events cause the most damage to (a)Human health and (b)economically in the United States. The underlying data comes form the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database.
The bzip2 file was downloaded from the course website, the csv file was extracted and the data was loaded into R for analysis. Blank or NA values were ignored and from the data:
This section describes how the data was downloaded, loaded into R and processed for analysis.
The data comes in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size. You can download the file from the course web site: https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2
library(R.utils)
## Warning: package 'R.utils' was built under R version 3.2.5
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.20.0 (2016-02-17) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
## R.utils v2.3.0 (2016-04-13) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
##
## timestamp
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Check if zipped data file exists and download to working directory if not
if (!file.exists("./storm.csv.bz2")) {
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2","./storm.csv.bz2")
}
#Check if csv exists and unzip zipped file otherwise
if (!file.exists("./storm.csv")) {
bunzip2("./storm.csv.bz2", "./storm.csv", remove = FALSE)
}
#load storm data from csv file
storm <- read.csv("./storm.csv", header = TRUE)
To make computations faster, select only relevant columns to determine property damage and effects on human health, storing the result in another data frame
relevant_col <- c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
relevant_data<-storm[relevant_col]
To see the effect of the weather events on human health in terms of fatalities and injuries, the data is first grouped by event type, before being sorted for fatalities and injuries
#Group events by type
events_group<-group_by(relevant_data, EVTYPE)
#Event that cause the most fatalities
top_fatalities<-summarize(events_group, total = sum(FATALITIES))%>%arrange(desc(total))%>%top_n(10)
## Selecting by total
#Event that cause the most injuries
top_injuries<-summarize(events_group, total = sum(INJURIES))%>%arrange(desc(total))%>%top_n(10)
## Selecting by total
Both property damage and crop damage have an exponent column which measures whether the damage is in hundred, thousands, millions or billions. The numerical damage needs to be multiplied by these factors to obtain the total damage amount.
unique(relevant_data$PROPDMGEXP)
## [1] K M B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(relevant_data$CROPDMGEXP)
## [1] M K m B ? 0 k 2
## Levels: ? 0 2 B k K m M
relevant_data$PROPDMGEXP <- as.character(relevant_data$PROPDMGEXP)
relevant_data$PROPDMGEXP[is.na(relevant_data$PROPDMGEXP)]=0
relevant_data$PROPDMGEXP = gsub("\\+|\\-|\\?", "0", relevant_data$PROPDMGEXP)
relevant_data$PROPDMGEXP = gsub("H|h", "2", relevant_data$PROPDMGEXP)
relevant_data$PROPDMGEXP = gsub("K|k", "3", relevant_data$PROPDMGEXP)
relevant_data$PROPDMGEXP = gsub("M|m", "6", relevant_data$PROPDMGEXP)
relevant_data$PROPDMGEXP = gsub("B|b", "9", relevant_data$PROPDMGEXP)
relevant_data$PROPDMGEXP<-as.numeric(relevant_data$PROPDMGEXP)
relevant_data$Total_Property_Damage<-relevant_data$PROPDMG*10^relevant_data$PROPDMGEXP
property_damage<-aggregate(Total_Property_Damage~EVTYPE, data = relevant_data, sum)
property_damage_ordered<-property_damage[order(-property_damage$Total_Property_Damage),]
property_damage_ordered_top_ten<-property_damage_ordered[1:10,]
relevant_data$CROPDMGEXP <- as.character(relevant_data$CROPDMGEXP)
relevant_data$CROPDMGEXP[is.na(relevant_data$CROPDMGEXP)]=0
relevant_data$CROPDMGEXP = gsub("\\+|\\-|\\?", "0", relevant_data$CROPDMGEXP)
relevant_data$CROPDMGEXP = gsub("H|h", "2", relevant_data$CROPDMGEXP)
relevant_data$CROPDMGEXP = gsub("K|k", "3", relevant_data$CROPDMGEXP)
relevant_data$CROPDMGEXP = gsub("M|m", "6", relevant_data$CROPDMGEXP)
relevant_data$CROPDMGEXP = gsub("B|b", "9", relevant_data$CROPDMGEXP)
relevant_data$CROPDMGEXP<-as.numeric(relevant_data$CROPDMGEXP)
relevant_data$Total_Crop_Damage<-relevant_data$CROPDMG*10^relevant_data$CROPDMGEXP
crop_damage<-aggregate(Total_Crop_Damage~EVTYPE, data = relevant_data, sum)
crop_damage_ordered<-crop_damage[order(-crop_damage$Total_Crop_Damage),]
crop_damage_ordered_top_ten<-crop_damage_ordered[1:10,]
To calculate the total economic impact, the total property damage is added to the total crop damage, for each event.
total_damage<-aggregate(Total_Property_Damage + Total_Crop_Damage ~EVTYPE, data = relevant_data, sum)
names(total_damage)[2]<-"Total"
total_damage_top_ten<-arrange(total_damage, desc(Total))%>%top_n(10)
## Selecting by Total
The following bar graphs show the number of fatalities and number of injuries occuring as a result of different types of weather events.
par(mfrow=c(1,2))
barplot(top_fatalities$total, names = top_fatalities$EVTYPE, ylab = "Total Fatalities", main = "Fatalities by Weather Event Type", las = 2)
barplot(top_injuries$total, names = top_injuries$EVTYPE, ylab = "Total Injuries", main = "Injuries by Weather Event Type", las = 2)
From the bar graph above, Tornados are the deadliest weather event and they cause the most injuries too.
The following tables show the weather events that caused the most property damage, crop damage and total damage.
#Events that caused the most property damage
property_damage_ordered_top_ten
## EVTYPE Total_Property_Damage
## 63 FLOOD 144657709800
## 181 HURRICANE/TYPHOON 69305840000
## 335 TORNADO 56947380674
## 283 STORM SURGE 43323536000
## 51 FLASH FLOOD 16822673772
## 105 HAIL 15735267456
## 173 HURRICANE 11868319010
## 343 TROPICAL STORM 7703890550
## 402 WINTER STORM 6688497251
## 158 HIGH WIND 5270046295
#Events that caused the most crop damage
crop_damage_ordered_top_ten
## EVTYPE Total_Crop_Damage
## 16 DROUGHT 13972566000
## 35 FLOOD 5661968450
## 99 RIVER FLOOD 5029459000
## 86 ICE STORM 5022113500
## 53 HAIL 3025954470
## 78 HURRICANE 2741910000
## 83 HURRICANE/TYPHOON 2607872800
## 30 FLASH FLOOD 1421317100
## 26 EXTREME COLD 1292973000
## 47 FROST/FREEZE 1094086000
#Events that caused the most total damage
total_damage_top_ten
## EVTYPE Total
## 1 FLOOD 138007444500
## 2 HURRICANE/TYPHOON 29348167800
## 3 TORNADO 16570326363
## 4 HURRICANE 12405268000
## 5 RIVER FLOOD 10108369000
## 6 HAIL 10048596590
## 7 FLASH FLOOD 8716525177
## 8 ICE STORM 5925150850
## 9 STORM SURGE/TIDE 4641493000
## 10 THUNDERSTORM WIND 3813647990
The bar graphs below show the economic damage (in billions) resulting from the different weather events.
par(mfrow = c(1,3))
barplot(total_damage_top_ten$Total/10^9, names = total_damage_top_ten$EVTYPE, ylab = "Total Damage ($ Billions)", main = "Total Damage ($) by \n Weather Event", las = 2)
barplot(property_damage_ordered_top_ten$Total_Property_Damage/10^9, names = property_damage_ordered_top_ten$EVTYPE, ylab = "Property Damage ($ Billions)", main = "Property Damage ($) by \n Weather Event", las = 2)
barplot(crop_damage_ordered_top_ten$Total_Crop_Damage/10^9, names = crop_damage_ordered_top_ten$EVTYPE, ylab = "Crop Damage ($ Billions)", main = "Crop Damage ($) by \n Weather Event", las = 2)
From the bar graphs above, we see that floods caused the most property damage, while droughts caused the most crop damage. Overall, floods caused the most economic damage.