0.1 Synopsis

The purpose of this report is to determine the most harmful weather disasters for human health and economy. We do this by analyzing the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States. We particularly track the type of events and their injures and fatalities. We also track the cost of damage they did to properties and crops. The section data processing explains the code we did for our analysis and the results could be found in the results secions in the form of tables and figures.

0.2 Data Processing

1- loading the required libraries

library(ggplot2)
library(plyr)
library(dplyr)
library(grid)
library(reshape2)

2- Download, unzip and load the data

#Set the working directory where this code is located
setwd("E:/Courses/Reproducible Research/peer assignments/2")
if(!file.exists('StormData.csv.bz2'))
{
  download.file(url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",destfile = "StormData.csv.bz2") 
  bzfile('StormData.csv.bz2')
}
stormData <- read.csv(bzfile('StormData.csv.bz2'),stringsAsFactors = FALSE)

3- Preprocessing to convert all events to lower case and group similar events in one event

#events names to lower case to avoid redundancy. Example snow is the same as SNOW
stormData$EVTYPE <- tolower(stormData$EVTYPE)
#group  related events in one event
stormData$EVTYPE[grepl("snow",stormData$EVTYPE)] <- "snow"
stormData$EVTYPE[grepl("mud\\s*slide(s)?",stormData$EVTYPE)] <- "mudslide"
stormData$EVTYPE[grepl("high\\s*wind(s)?",stormData$EVTYPE)] <- "high wind"
stormData$EVTYPE[grepl("marine\\s*(high|strong)\\s*wind",stormData$EVTYPE)] <- "marine wind"
stormData$EVTYPE[grepl("hail",stormData$EVTYPE)] <- "hail"
stormData$EVTYPE[grepl("flood|flooding",stormData$EVTYPE)] <- "flood"
stormData$EVTYPE[grepl("hurricane|hurricanes",stormData$EVTYPE)] <- "hurricane"
stormData$EVTYPE[grepl("cold",stormData$EVTYPE)] <- "cold"
stormData$EVTYPE[grepl("heat",stormData$EVTYPE)] <- "heat"
stormData$EVTYPE[grepl("fire",stormData$EVTYPE)] <- "fire"
stormData$EVTYPE[grepl("dust",stormData$EVTYPE)] <- "dust"
stormData$EVTYPE[grepl("thunderstorm|thundertorm",stormData$EVTYPE)] <- "thunderstorm"
stormData$EVTYPE[grepl("tornado",stormData$EVTYPE)] <- "tornado"
stormData$EVTYPE[grepl("lightning|ligntning|lighting",stormData$EVTYPE)] <- "lightning"
stormData$EVTYPE[grepl("tstm\\s*(wind|winds){1}",stormData$EVTYPE)] <- "tstm wind"
stormData$EVTYPE[grepl("rip\\s*(current|currents){1}",stormData$EVTYPE)] <- "rip current"
stormData$EVTYPE[grepl("winter\\s*weather",stormData$EVTYPE)] <- "winter weather"

4- Analyzing events which are most harmful with respect to population health

eventsAftermath <- stormData[stormData$INJURIES >0 | stormData$FATALITIES > 0,c("STATE","EVTYPE","INJURIES","FATALITIES")]
eventsAftermath$casualty <- eventsAftermath$INJURIES + eventsAftermath$FATALITIES
eventsAftermath$EVTYPE <- as.character(eventsAftermath$EVTYPE)
eventsCasualty <- aggregate(cbind(INJURIES , FATALITIES , casualty) ~ EVTYPE,data=eventsAftermath,FUN = sum,na.rm = TRUE)
threshold <- mean(eventsCasualty$casualty)
#events with casualty greater than certain threshold
topeventsCasualty <- eventsCasualty[eventsCasualty$casualty > threshold,]
casualtyHist <- ggplot(data=topeventsCasualty, aes(x =factor(topeventsCasualty$EVTYPE) ,y = topeventsCasualty$casualty, fill = topeventsCasualty$EVTYPE)) 
casualtyHist <- casualtyHist + theme( axis.text.x=element_text(size=16, angle=40, vjust=.8, hjust=1.01))
casualtyHist = casualtyHist + geom_bar(stat = "identity") 
casualtyHist = casualtyHist + ggtitle("Top Event Casualties") 
casualtyHist = casualtyHist + xlab("Event Type")  
casualtyHist = casualtyHist + ylab("Number of Casualties")
casualtyHist <- casualtyHist + guides(fill=guide_legend(title="Event Type"))
print(casualtyHist)

topeventsCasualty[with(topeventsCasualty, order(-casualty)), c("EVTYPE","casualty")]
##          EVTYPE casualty
## 78      tornado    97043
## 31         heat    12362
## 17        flood    10129
## 82    tstm wind     7484
## 56    lightning     6048
## 77 thunderstorm     2689
## 52    ice storm     2064
## 44    high wind     1774
## 16         fire     1698
threshold <- mean(eventsCasualty$INJURIES)
#events with casualty greater than certain threshold
topEventsInjuries <- eventsCasualty[eventsCasualty$INJURIES > threshold,]
InjuriesHist <- ggplot(data=topEventsInjuries, aes(x =factor(topEventsInjuries$EVTYPE) ,y = topEventsInjuries$INJURIES, fill = topEventsInjuries$EVTYPE)) 
InjuriesHist <- InjuriesHist + theme( axis.text.x=element_text(size=16, angle=40, vjust=.8, hjust=1.01))
InjuriesHist = InjuriesHist + geom_bar(stat = "identity") 
InjuriesHist = InjuriesHist + ggtitle("Top Event Injuries") 
InjuriesHist = InjuriesHist + xlab("Event Type")  
InjuriesHist = InjuriesHist + ylab("Number of Injuries")
InjuriesHist <- InjuriesHist + guides(fill=guide_legend(title="Event Type"))
print(InjuriesHist)

topEventsInjuries[with(topEventsInjuries, order(-INJURIES)), c("EVTYPE","INJURIES")]
##          EVTYPE INJURIES
## 78      tornado    91407
## 31         heat     9224
## 17        flood     8604
## 82    tstm wind     6970
## 56    lightning     5231
## 77 thunderstorm     2478
## 52    ice storm     1975
## 16         fire     1608
## 44    high wind     1480
## 29         hail     1467
threshold <- mean(eventsCasualty$FATALITIES)
#events with casualty greater than certain threshold
topEventsFatalities <- eventsCasualty[eventsCasualty$FATALITIES > threshold,]
fatalitiesHist <- ggplot(data=topEventsFatalities, aes(x =factor(topEventsFatalities$EVTYPE) ,y = topEventsFatalities$FATALITIES, fill = topEventsFatalities$EVTYPE)) 
fatalitiesHist <- fatalitiesHist + theme( axis.text.x=element_text(size=16, angle=40, vjust=.8, hjust=1.01))
fatalitiesHist = fatalitiesHist + geom_bar(stat = "identity") 
fatalitiesHist = fatalitiesHist + ggtitle("Top Event Fatalities") 
fatalitiesHist = fatalitiesHist + xlab("Event Type")  
fatalitiesHist = fatalitiesHist + ylab("Number of Fatalities")
fatalitiesHist <- fatalitiesHist + guides(fill=guide_legend(title="Event Type"))
print(fatalitiesHist)

topEventsFatalities[with(topEventsFatalities, order(-FATALITIES)), c("EVTYPE","FATALITIES")]
##          EVTYPE FATALITIES
## 78      tornado       5636
## 31         heat       3138
## 17        flood       1525
## 56    lightning        817
## 67  rip current        577
## 82    tstm wind        514
## 7          cold        436
## 44    high wind        294
## 2     avalanche        224
## 77 thunderstorm        211
## 94 winter storm        206
## 72         snow        169
orderedCasualties <- eventsCasualty[with(eventsCasualty, order(-FATALITIES, -INJURIES)), c("EVTYPE","FATALITIES", "INJURIES")]
topOrderedCasualties <- orderedCasualties[1:9,]
mEventsDamage <- melt(topOrderedCasualties, id=c("EVTYPE"))
ggplot(mEventsDamage, aes(EVTYPE, value, fill=variable)) + 
  geom_bar(position="dodge",stat="identity") + 
  xlab("Event Type") +
  ylab("Casualty Value")+
  theme( axis.text.x=element_text(size=16, angle=40, vjust=.8, hjust=1.01))

topOrderedCasualties
##         EVTYPE FATALITIES INJURIES
## 78     tornado       5636    91407
## 31        heat       3138     9224
## 17       flood       1525     8604
## 56   lightning        817     5231
## 67 rip current        577      529
## 82   tstm wind        514     6970
## 7         cold        436      316
## 44   high wind        294     1480
## 2    avalanche        224      170

5- Analyzing events with the greatest economic consequences + We will work with the variables PROPDMG and CROPDMG. They represent the crop damage and property damage. + The variables CROPDMGEXP and PROPDMGEXP represent the multiplier of the CROPDMG and PROPDMG. They have the values of h|H for 100,k|K for 1000, m|M for millions, b|B for billions. Other values like 0-9 are multiples of 10 and approximated to 10. Others are set to 1 + Choose events with PROPDMG or CROPDMG greater than 0.

eventsFinDamage <- stormData[stormData$PROPDMG >0 | stormData$CROPDMG > 0,c("EVTYPE","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
#compute property damage
eventsFinDamage$PROPMUL <- eventsFinDamage$PROPDMGEXP
eventsFinDamage$PROPMUL[grepl("[0-9]|\\+|\\-|\\?",eventsFinDamage$PROPMUL)] <- "10"
eventsFinDamage$PROPMUL[grepl("h|H",eventsFinDamage$PROPMUL)] <- "100"
eventsFinDamage$PROPMUL[grepl("k|K",eventsFinDamage$PROPMUL)] <- "1000"
eventsFinDamage$PROPMUL[grepl("m|M",eventsFinDamage$PROPMUL)] <- "1000000"
eventsFinDamage$PROPMUL[grepl("b|B",eventsFinDamage$PROPMUL)] <- "1000000000"
eventsFinDamage$PROPMUL <- as.numeric(eventsFinDamage$PROPMUL)
eventsFinDamage$PROPMUL[is.na(eventsFinDamage$PROPMUL)] <- 1
eventsFinDamage$PROPTOT <- eventsFinDamage$PROPDMG * eventsFinDamage$PROPMUL
#compute crop damage
eventsFinDamage$CROPMUL <- eventsFinDamage$CROPDMGEXP
eventsFinDamage$CROPMUL[grepl("0|\\?",eventsFinDamage$CROPMUL)] <- "10"
eventsFinDamage$CROPMUL[grepl("h|H",eventsFinDamage$CROPMUL)] <- "100"
eventsFinDamage$CROPMUL[grepl("k|K",eventsFinDamage$CROPMUL)] <- "1000"
eventsFinDamage$CROPMUL[grepl("m|M",eventsFinDamage$CROPMUL)] <- "1000000"
eventsFinDamage$CROPMUL[grepl("b|B",eventsFinDamage$CROPMUL)] <- "1000000000"
eventsFinDamage$CROPMUL <- as.numeric(eventsFinDamage$CROPMUL)
eventsFinDamage$CROPMUL[is.na(eventsFinDamage$CROPMUL)] <- 1
eventsFinDamage$CROPTOT <- eventsFinDamage$CROPDMG * eventsFinDamage$CROPMUL
eventsFinDamage$TOTDM <- eventsFinDamage$PROPTOT + eventsFinDamage$CROPTOT

eventsFinDamageTOT <- eventsFinDamage[,c("EVTYPE","PROPTOT","CROPTOT","TOTDM")]
eventsDmg <- aggregate(cbind(PROPTOT , CROPTOT , TOTDM) ~ EVTYPE,data=eventsFinDamageTOT,FUN = sum,na.rm = TRUE)
threshold <- mean(eventsDmg$TOTDM)
#events with casualty greater than certain threshold
totFinDmg <- eventsDmg[eventsDmg$TOTDM > threshold,]
totDmgHist <- ggplot(data=totFinDmg, aes(x =factor(totFinDmg$EVTYPE) ,y = totFinDmg$TOTDM, fill = totFinDmg$EVTYPE)) 
totDmgHist <- totDmgHist + theme( axis.text.x=element_text(size=16, angle=40, vjust=.8, hjust=1.01))
totDmgHist = totDmgHist + geom_bar(stat = "identity") 
totDmgHist = totDmgHist + ggtitle("Top Events Total Financial Damage") 
totDmgHist = totDmgHist + xlab("Event Type")  
totDmgHist = totDmgHist + ylab("Total Financial Damage")
totDmgHist <- totDmgHist + guides(fill=guide_legend(title="Event Type"))
print(totDmgHist)

#plot both crop and property total damage
orderedEventsDmg <- eventsDmg[with(eventsDmg, order(-PROPTOT, -CROPTOT)), c("EVTYPE","PROPTOT","CROPTOT")]
topOrderedEventsDmg <- orderedEventsDmg[1:9,]
mEventsFinDamage <- melt(topOrderedEventsDmg, id=c("EVTYPE"))
ggplot(mEventsFinDamage, aes(EVTYPE, value, fill=variable)) + 
  geom_bar(position="dodge",stat="identity") +
  ggtitle("Top Events Financial Damage") +
  xlab("Event Type") +
  ylab("Damage Value") +
  theme( axis.text.x=element_text(size=16, angle=40, vjust=.8, hjust=1.01))

topOrderedEventsDmg
##             EVTYPE      PROPTOT     CROPTOT
## 29           flood 167518189417 12380089100
## 69       hurricane  84656180010  5505292800
## 113        tornado  56993100640   414962910
## 103    storm surge  43323536000        5000
## 49            hail  17619993958  3114213053
## 28            fire   8501628500   403281630
## 116 tropical storm   7703890550   678346000
## 142   winter storm   6688497260    26944000
## 110   thunderstorm   6431638935   652847108

0.3 Results

Through our analysis we were able to detect weather events that are most harmful to human health. They could be found in the table below.

##         EVTYPE FATALITIES INJURIES
## 78     tornado       5636    91407
## 31        heat       3138     9224
## 17       flood       1525     8604
## 56   lightning        817     5231
## 67 rip current        577      529
## 82   tstm wind        514     6970
## 7         cold        436      316
## 44   high wind        294     1480
## 2    avalanche        224      170

And the weather events that are most harmful to economy according to our analysis could be found below.

##             EVTYPE      PROPTOT     CROPTOT
## 29           flood 167518189417 12380089100
## 69       hurricane  84656180010  5505292800
## 113        tornado  56993100640   414962910
## 103    storm surge  43323536000        5000
## 49            hail  17619993958  3114213053
## 28            fire   8501628500   403281630
## 116 tropical storm   7703890550   678346000
## 142   winter storm   6688497260    26944000
## 110   thunderstorm   6431638935   652847108