Synopsis

Weather events have an impact on human health and economic consequences. National Oceanic and Atmospheric Administration’s (NOAA) have been gathering information about the impactos of extreme weater events. Here is an analysis of data base provided by NOAA. The idea is to know what the most harmful with respect to population health, and greatest economic consequences across the United States. The analisys include the steps get the raw data, procesisng of data base, analysis and to show results, These are shown in a graph and in a table. All steps in this analysis are reproducible. The analysis find out that the tornados and excessive heat are the two most harmful events with the population health. With respect of the economic consecuencies flood and hurricane typhoon have the greatest economic consequences.

Data Processing

Getting the data

# Download the data file
if(!file.exists('StormData.csv.bz2')){
        dataUrl <- 'https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2'
        download.file(dataUrl, destfile = 'StormData.csv.bz2')
}
# Load the data
data <- read.csv('StormData.csv.bz2')

Standardization of event types names, because some of them are in uppercase and ather in lowercase, also some of them have this character in the name “/”

library(stringr)
data$EVTYPE <- str_to_title(data$EVTYPE)
data$EVTYPE <- sub('/', ' ', data$EVTYPE)

Summarising the injuries and fatalities and select the twenty weather events most harmful

dataSum <- aggregate(cbind(INJURIES, FATALITIES) ~ EVTYPE, data = data, sum, na.rm=TRUE)
dataHarmful <- dataSum[with(dataSum, order(-(dataSum$INJURIES + dataSum$FATALITIES))),][1:20,]

Convert all characters of PROPDMGEXP variable in uppercase, because some of them are in lowercase. And changing the values in PROPDMG variable with characters “K” for thousands, “M” for millions, and “B” for billions in the PROPDMGEXP variable

data$PROPDMGEXP <- toupper(data$PROPDMGEXP)

data$PROPDMG[data$PROPDMGEXP == 'K'] <- data$PROPDMG[data$PROPDMGEXP == 'K'] * 1000
data$PROPDMG[data$PROPDMGEXP == 'M'] <- data$PROPDMG[data$PROPDMGEXP == 'M'] * 1000000
data$PROPDMG[data$PROPDMGEXP == 'B'] <- data$PROPDMG[data$PROPDMGEXP == 'B'] * 1000000000

Convert all characters of CROPDMGEXP variable in uppercase, because some of them are in lowercase. And hanging the values in CROPDMG variable with characters “K” for thousands, “M” for millions, and “B” for billions in the CROPDMGEXP variable

data$CROPDMGEXP <- toupper(data$CROPDMGEXP) # Some characteres are in lowercase

data$CROPDMG[data$CROPDMGEXP == 'K'] <- data$CROPDMG[data$CROPDMGEXP == 'K'] * 1000
data$CROPDMG[data$CROPDMGEXP == 'M'] <- data$CROPDMG[data$CROPDMGEXP == 'M'] * 1000000
data$CROPDMG[data$CROPDMGEXP == 'B'] <- data$CROPDMG[data$CROPDMGEXP == 'B'] * 1000000000

Preparing the data to the analysis of of the most harmful event types with respect to population health

library(reshape2)
dataHarmfulPlot <- melt(dataHarmful, id.vars = 1)
dataHarmfulPlot$variable <- str_to_title(dataHarmfulPlot$variable)
dataHarmfulPlot$tipeEv <- reorder(dataHarmfulPlot$EVTYPE, dataHarmfulPlot$value)

Preparing the data to the analysis of event types with the greatest economic consequences

dataSum <- aggregate(cbind(PROPDMG, CROPDMG) ~ EVTYPE, data = data, sum, na.rm=TRUE)
dataEconomic <- dataSum[with(dataSum, order(-(dataSum$PROPDMG + dataSum$CROPDMG))),][1:20,]
dataEconomic$Total <- dataEconomic$PROPDMG + dataEconomic$CROPDMG
names(dataEconomic) <- c('Event type', 'Crop damage USD', 'Property damage USD', 'Total damage USD')

Results

Getting the twenty events types most harmful with respect to population health

# Making the graph with ggplot
library(ggplot2)
library(scales)
plot1 <- ggplot(dataHarmfulPlot, aes(x = tipeEv)) + geom_bar(aes(weight = value, fill = variable)) + coord_flip() + labs(list(y = 'Frequency', x = 'Event type', title= 'The twenty weather most harmful events with respect\nto population health across the United States')) + scale_y_continuous(labels = comma) + theme_bw() + scale_fill_manual(values = c('gray', 'dimgray'), name = 'Results of\nthe event')
plot1

Getting the twenty events types with the greatest economic consequences

#Create a table to show the results
library(xtable)
dataEconomicT <- xtable(dataEconomic, digits = c(0, 0, 0, 0, 0))
print(dataEconomicT, type = 'html', include.rownames = FALSE)
Event type Crop damage USD Property damage USD Total damage USD
Flood 144657709807 5661968450 150319678257
Hurricane Typhoon 69305840000 2607872800 71913712800
Tornado 56937160779 414953270 57352114049
Storm Surge 43323536000 5000 43323541000
Hail 15732267048 3025954473 18758221521
Flash Flood 16140812067 1421317100 17562129167
Drought 1046106000 13972566000 15018672000
Hurricane 11868319010 2741910000 14610229010
River Flood 5118945500 5029459000 10148404500
Ice Storm 3944927860 5022113500 8967041360
Tropical Storm 7703890550 678346000 8382236550
Winter Storm 6688497251 26944000 6715441251
High Wind 5270046295 638571300 5908617595
Wildfire 4765114000 295472800 5060586800
Tstm Wind 4484958495 554007350 5038965845
Storm Surge Tide 4641188000 850000 4642038000
Thunderstorm Wind 3483121284 414843050 3897964334
Hurricane Opal 3172846000 19000000 3191846000
Wild Forest Fire 3001829500 106796830 3108626330
Heavy Rain Severe Weather 2500000000 0 2500000000