Extreme weather conditions can cause a great economic and health problems in communities. Some of them may cause injuries, property damage and even fatalities. This project explores the U.S National Oceanic and Atmospheric Administration’s storm database (1950-2011) to understand some of the most harmful events.
The analysis is focus in answering two questions: i) Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health? and ii) Across the United States, which types of events have the greatest economic consequences?
In which the conclussions are the following:
This analysis was perform using the Storm Events database provided by National Climatic Data Center. The data is from a comma-separated-value file available here. Also there is some documentation about the data following this link https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf.
setwd(“D:/02 Coursera/02 R/01 Johns Hopkings-Coursera/05 Reproducible Research/05 Project/repdata_data_StormData.csv”) RawData <- read.csv(‘repdata_data_StormData.csv’, sep = ‘,’, header = TRUE,na.strings = ‘NA’)
setwd('D:/02 Coursera/02 R/01 Johns Hopkings-Coursera/05 Reproducible Research')
download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",destfile = "StormData.csv.bz2")
RawData <- read.csv(bzfile('StormData.csv.bz2'), sep = ',', header = TRUE,na.strings = 'NA')
events <- c("Astronomical Low Tide", "Avalanche", "Blizzard", "Coastal Flood", "Cold/Wind Chill", "Debris Flow", "Dense Fog", "Dense Smoke", "Drought", "Dust Devil", "Dust Storm", "Excessive Heat", "Extreme cold/Wind Chill", "Flash Flood", "Flood", "Freezing", "Frost/Freeze", "Funnel Cloud", "Hail", "Heat", "Heavy Rain", "Heavy Snow", "High Surf", "High Wind", "Hurricane/Typhoon", "Ice Storm", "Lakeshore Flood", "Lake-Effect Snow", "Lightning", "Marine Hail", "Marine High Wind", "Marine Strong Wind", "Marine Thunderstorm Wind", "Rip Current", "Seiche", "Sleet", "Storm Tide", "Strong Wind", "Thunderstorm Wind", "Tornado", "Tropical Depression", "Tropical Storm", "Tsunami", "Volcanic Ash", "Waterspout", "Wildfire", "Winter Storm", "Winter Weather")
events_2 <- c("Astronomical Low Tide|Low Tide", "Avalanche", "Blizzard", "Coastal Flood", "Cold/Wind Chill", "Debris Flow", "Dense Fog", "Dense Smoke", "Drought", "Dust Devil", "Dust Storm", "Excessive Heat", "Extreme cold/Wind Chill|Extreme Cold|Wind Chill", "Flash Flood", "Flood", "Freezing", "Frost/Freeze|Frost|Freeze", "Funnel Cloud", "Hail", "Heat", "Heavy Rain", "Heavy Snow", "High Surf", "High Wind", "Hurricane/Typhoon|Hurricane|Typhoon", "Ice Storm", "Lakeshore Flood", "Lake-Effect Snow", "Lightning", "Marine Hail", "Marine High Wind", "Marine Strong Wind", "Marine Thunderstorm Wind|Marine tstm Wind", "Rip Current", "Seiche", "Sleet", "Storm Tide", "Strong Wind", "Thunderstorm Wind|tstm wind", "Tornado", "Tropical Depression", "Tropical Storm", "Tsunami", "Volcanic Ash", "Waterspout", "Wildfire", "Winter Storm", "Winter Weather")
options(scipen = 999)
EVTYPE <- character()
FATALITIES <- numeric()
INJURIES <- numeric()
PROPDMG <- numeric()
PROPDMGEXP <- character()
CROPDMG <- numeric()
CROPDMGEXP <- character()
ProcData <- data.frame(EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)
for (i in 1:length(events)){
rows <- RawData[grep(events_2[i],RawData$EVTYPE,ignore.case = TRUE),]
rows <- rows[,c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
Names <- c(rep(events[i],nrow(rows)))
rows <- cbind(rows,Names)
ProcData <- rbind(ProcData,rows)
}
ProcData$CROPDMGEXP <- as.character(ProcData$CROPDMGEXP)
ProcData$PROPDMGEXP <- as.character(ProcData$PROPDMGEXP)
ProcData[(ProcData$PROPDMGEXP == "K" | ProcData$PROPDMGEXP == "k"), ]$PROPDMGEXP <- 3
ProcData[(ProcData$PROPDMGEXP == "M" | ProcData$PROPDMGEXP == "m"), ]$PROPDMGEXP <- 6
ProcData[(ProcData$PROPDMGEXP == "B" | ProcData$PROPDMGEXP == "b"), ]$PROPDMGEXP <- 9
ProcData[(ProcData$CROPDMGEXP == "K" | ProcData$CROPDMGEXP == "k"), ]$CROPDMGEXP <- 3
ProcData[(ProcData$CROPDMGEXP == "M" | ProcData$CROPDMGEXP == "m"), ]$CROPDMGEXP <- 6
ProcData[(ProcData$CROPDMGEXP == "B" | ProcData$CROPDMGEXP == "b"), ]$CROPDMGEXP <- 9
suppressWarnings(ProcData$PROPDMG <- ProcData$PROPDMG * 10^as.numeric(ProcData$PROPDMGEXP))
suppressWarnings(ProcData$CROPDMG <- ProcData$CROPDMG * 10^as.numeric(ProcData$CROPDMGEXP))
suppressWarnings(ProcData$TOTAL_ECODMG <- ProcData$PROPDMG + ProcData$CROPDMG)
ProcData <- ProcData[, c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "CROPDMG", "Names", "TOTAL_ECODMG")]
library(ggplot2)
GroupFatalities <- aggregate(FATALITIES~Names,data=ProcData,FUN = sum)
GroupFatalities <- GroupFatalities[order(GroupFatalities$FATALITIES,decreasing = TRUE),]
TopFatalities <- GroupFatalities[1:10,]
print(TopFatalities)
## Names FATALITIES
## 38 Tornado 5661
## 19 Heat 3138
## 11 Excessive Heat 1922
## 14 Flood 1525
## 13 Flash Flood 1035
## 28 Lightning 817
## 37 Thunderstorm Wind 753
## 33 Rip Current 577
## 12 Extreme cold/Wind Chill 382
## 23 High Wind 299
In terms of injuries we have:
GroupInjuries <- aggregate(INJURIES~Names,data=ProcData,FUN = sum)
GroupInjuries <- GroupInjuries[order(GroupInjuries$INJURIES,decreasing = TRUE),]
TopInjuries <- GroupInjuries[1:10,]
print(TopInjuries)
## Names INJURIES
## 38 Tornado 91407
## 37 Thunderstorm Wind 9493
## 19 Heat 9224
## 14 Flood 8604
## 11 Excessive Heat 6525
## 28 Lightning 5232
## 25 Ice Storm 1992
## 13 Flash Flood 1802
## 23 High Wind 1523
## 18 Hail 1467
TotalEcoDmg <- aggregate(TOTAL_ECODMG~Names,data=ProcData,FUN = sum)
TotalEcoDmg <- TotalEcoDmg[order(TotalEcoDmg$TOTAL_ECODMG,decreasing = TRUE),]
TopEconDmg <- TotalEcoDmg[1:10,]
print(TopEconDmg)
## Names TOTAL_ECODMG
## 14 Flood 157764680787
## 24 Hurricane/Typhoon 44330000800
## 38 Tornado 18172843863
## 18 Hail 11681050140
## 13 Flash Flood 9224527227
## 37 Thunderstorm Wind 7098296330
## 25 Ice Storm 5925150850
## 44 Wildfire 3685468370
## 23 High Wind 3472442200
## 8 Drought 1886667000
ggplot(TopEconDmg,aes(Names,TOTAL_ECODMG/(10^9))) + geom_bar(stat = "identity",color = "blue",fill = "lightblue") + coord_flip() + ggtitle("Top 10 Events with the Greatest Economic Damages") + labs(x="Events",y = "Economic Damage ($billlions)")
## Conclusions
We conclude that Tornado and Excesive heat are the most harmuf with respect to Population Health, while Flood and Hurricane had great impact on the US economy.