The purpose of this report is to determine the most harmful weather disasters for human health and economy. We do this by analyzing the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States. We particularly track the type of events and their injures and fatalities. We also track the cost of damage they did to properties and crops. The section data processing explains the code we did for our analysis and the results could be found in the results secions in the form of tables and figures.
1- loading the required libraries
library(ggplot2)
library(plyr)
library(dplyr)
library(grid)
library(reshape2)
2- Download, unzip and load the data
#Set the working directory where this code is located
setwd("E:/Courses/Reproducible Research/peer assignments/2")
if(!file.exists('StormData.csv.bz2'))
{
download.file(url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",destfile = "StormData.csv.bz2")
bzfile('StormData.csv.bz2')
}
stormData <- read.csv(bzfile('StormData.csv.bz2'),stringsAsFactors = FALSE)
3- Preprocessing to convert all events to lower case and group similar events in one event
#events names to lower case to avoid redundancy. Example snow is the same as SNOW
stormData$EVTYPE <- tolower(stormData$EVTYPE)
#group related events in one event
stormData$EVTYPE[grepl("snow",stormData$EVTYPE)] <- "snow"
stormData$EVTYPE[grepl("mud\\s*slide(s)?",stormData$EVTYPE)] <- "mudslide"
stormData$EVTYPE[grepl("high\\s*wind(s)?",stormData$EVTYPE)] <- "high wind"
stormData$EVTYPE[grepl("marine\\s*(high|strong)\\s*wind",stormData$EVTYPE)] <- "marine wind"
stormData$EVTYPE[grepl("hail",stormData$EVTYPE)] <- "hail"
stormData$EVTYPE[grepl("flood|flooding",stormData$EVTYPE)] <- "flood"
stormData$EVTYPE[grepl("hurricane|hurricanes",stormData$EVTYPE)] <- "hurricane"
stormData$EVTYPE[grepl("cold",stormData$EVTYPE)] <- "cold"
stormData$EVTYPE[grepl("heat",stormData$EVTYPE)] <- "heat"
stormData$EVTYPE[grepl("fire",stormData$EVTYPE)] <- "fire"
stormData$EVTYPE[grepl("dust",stormData$EVTYPE)] <- "dust"
stormData$EVTYPE[grepl("thunderstorm|thundertorm",stormData$EVTYPE)] <- "thunderstorm"
stormData$EVTYPE[grepl("tornado",stormData$EVTYPE)] <- "tornado"
stormData$EVTYPE[grepl("lightning|ligntning|lighting",stormData$EVTYPE)] <- "lightning"
stormData$EVTYPE[grepl("tstm\\s*(wind|winds){1}",stormData$EVTYPE)] <- "tstm wind"
stormData$EVTYPE[grepl("rip\\s*(current|currents){1}",stormData$EVTYPE)] <- "rip current"
stormData$EVTYPE[grepl("winter\\s*weather",stormData$EVTYPE)] <- "winter weather"
4- Analyzing events which are most harmful with respect to population health
eventsAftermath <- stormData[stormData$INJURIES >0 | stormData$FATALITIES > 0,c("STATE","EVTYPE","INJURIES","FATALITIES")]
eventsAftermath$casualty <- eventsAftermath$INJURIES + eventsAftermath$FATALITIES
eventsAftermath$EVTYPE <- as.character(eventsAftermath$EVTYPE)
eventsCasualty <- aggregate(cbind(INJURIES , FATALITIES , casualty) ~ EVTYPE,data=eventsAftermath,FUN = sum,na.rm = TRUE)
threshold <- mean(eventsCasualty$casualty)
#events with casualty greater than certain threshold
topeventsCasualty <- eventsCasualty[eventsCasualty$casualty > threshold,]
casualtyHist <- ggplot(data=topeventsCasualty, aes(x =factor(topeventsCasualty$EVTYPE) ,y = topeventsCasualty$casualty, fill = topeventsCasualty$EVTYPE))
casualtyHist <- casualtyHist + theme( axis.text.x=element_text(size=16, angle=40, vjust=.8, hjust=1.01))
casualtyHist = casualtyHist + geom_bar(stat = "identity")
casualtyHist = casualtyHist + ggtitle("Top Event Casualties")
casualtyHist = casualtyHist + xlab("Event Type")
casualtyHist = casualtyHist + ylab("Number of Casualties")
casualtyHist <- casualtyHist + guides(fill=guide_legend(title="Event Type"))
print(casualtyHist)
topeventsCasualty[with(topeventsCasualty, order(-casualty)), c("EVTYPE","casualty")]
## EVTYPE casualty
## 78 tornado 97043
## 31 heat 12362
## 17 flood 10129
## 82 tstm wind 7484
## 56 lightning 6048
## 77 thunderstorm 2689
## 52 ice storm 2064
## 44 high wind 1774
## 16 fire 1698
threshold <- mean(eventsCasualty$INJURIES)
#events with casualty greater than certain threshold
topEventsInjuries <- eventsCasualty[eventsCasualty$INJURIES > threshold,]
InjuriesHist <- ggplot(data=topEventsInjuries, aes(x =factor(topEventsInjuries$EVTYPE) ,y = topEventsInjuries$INJURIES, fill = topEventsInjuries$EVTYPE))
InjuriesHist <- InjuriesHist + theme( axis.text.x=element_text(size=16, angle=40, vjust=.8, hjust=1.01))
InjuriesHist = InjuriesHist + geom_bar(stat = "identity")
InjuriesHist = InjuriesHist + ggtitle("Top Event Injuries")
InjuriesHist = InjuriesHist + xlab("Event Type")
InjuriesHist = InjuriesHist + ylab("Number of Injuries")
InjuriesHist <- InjuriesHist + guides(fill=guide_legend(title="Event Type"))
print(InjuriesHist)
topEventsInjuries[with(topEventsInjuries, order(-INJURIES)), c("EVTYPE","INJURIES")]
## EVTYPE INJURIES
## 78 tornado 91407
## 31 heat 9224
## 17 flood 8604
## 82 tstm wind 6970
## 56 lightning 5231
## 77 thunderstorm 2478
## 52 ice storm 1975
## 16 fire 1608
## 44 high wind 1480
## 29 hail 1467
threshold <- mean(eventsCasualty$FATALITIES)
#events with casualty greater than certain threshold
topEventsFatalities <- eventsCasualty[eventsCasualty$FATALITIES > threshold,]
fatalitiesHist <- ggplot(data=topEventsFatalities, aes(x =factor(topEventsFatalities$EVTYPE) ,y = topEventsFatalities$FATALITIES, fill = topEventsFatalities$EVTYPE))
fatalitiesHist <- fatalitiesHist + theme( axis.text.x=element_text(size=16, angle=40, vjust=.8, hjust=1.01))
fatalitiesHist = fatalitiesHist + geom_bar(stat = "identity")
fatalitiesHist = fatalitiesHist + ggtitle("Top Event Fatalities")
fatalitiesHist = fatalitiesHist + xlab("Event Type")
fatalitiesHist = fatalitiesHist + ylab("Number of Fatalities")
fatalitiesHist <- fatalitiesHist + guides(fill=guide_legend(title="Event Type"))
print(fatalitiesHist)
topEventsFatalities[with(topEventsFatalities, order(-FATALITIES)), c("EVTYPE","FATALITIES")]
## EVTYPE FATALITIES
## 78 tornado 5636
## 31 heat 3138
## 17 flood 1525
## 56 lightning 817
## 67 rip current 577
## 82 tstm wind 514
## 7 cold 436
## 44 high wind 294
## 2 avalanche 224
## 77 thunderstorm 211
## 94 winter storm 206
## 72 snow 169
orderedCasualties <- eventsCasualty[with(eventsCasualty, order(-FATALITIES, -INJURIES)), c("EVTYPE","FATALITIES", "INJURIES")]
topOrderedCasualties <- orderedCasualties[1:9,]
mEventsDamage <- melt(topOrderedCasualties, id=c("EVTYPE"))
ggplot(mEventsDamage, aes(EVTYPE, value, fill=variable)) +
geom_bar(position="dodge",stat="identity") +
xlab("Event Type") +
ylab("Casualty Value")+
theme( axis.text.x=element_text(size=16, angle=40, vjust=.8, hjust=1.01))
topOrderedCasualties
## EVTYPE FATALITIES INJURIES
## 78 tornado 5636 91407
## 31 heat 3138 9224
## 17 flood 1525 8604
## 56 lightning 817 5231
## 67 rip current 577 529
## 82 tstm wind 514 6970
## 7 cold 436 316
## 44 high wind 294 1480
## 2 avalanche 224 170
5- Analyzing events with the greatest economic consequences + We will work with the variables PROPDMG and CROPDMG. They represent the crop damage and property damage. + The variables CROPDMGEXP and PROPDMGEXP represent the multiplier of the CROPDMG and PROPDMG. They have the values of h|H for 100,k|K for 1000, m|M for millions, b|B for billions. Other values like 0-9 are multiples of 10 and approximated to 10. Others are set to 1 + Choose events with PROPDMG or CROPDMG greater than 0.
eventsFinDamage <- stormData[stormData$PROPDMG >0 | stormData$CROPDMG > 0,c("EVTYPE","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
#compute property damage
eventsFinDamage$PROPMUL <- eventsFinDamage$PROPDMGEXP
eventsFinDamage$PROPMUL[grepl("[0-9]|\\+|\\-|\\?",eventsFinDamage$PROPMUL)] <- "10"
eventsFinDamage$PROPMUL[grepl("h|H",eventsFinDamage$PROPMUL)] <- "100"
eventsFinDamage$PROPMUL[grepl("k|K",eventsFinDamage$PROPMUL)] <- "1000"
eventsFinDamage$PROPMUL[grepl("m|M",eventsFinDamage$PROPMUL)] <- "1000000"
eventsFinDamage$PROPMUL[grepl("b|B",eventsFinDamage$PROPMUL)] <- "1000000000"
eventsFinDamage$PROPMUL <- as.numeric(eventsFinDamage$PROPMUL)
eventsFinDamage$PROPMUL[is.na(eventsFinDamage$PROPMUL)] <- 1
eventsFinDamage$PROPTOT <- eventsFinDamage$PROPDMG * eventsFinDamage$PROPMUL
#compute crop damage
eventsFinDamage$CROPMUL <- eventsFinDamage$CROPDMGEXP
eventsFinDamage$CROPMUL[grepl("0|\\?",eventsFinDamage$CROPMUL)] <- "10"
eventsFinDamage$CROPMUL[grepl("h|H",eventsFinDamage$CROPMUL)] <- "100"
eventsFinDamage$CROPMUL[grepl("k|K",eventsFinDamage$CROPMUL)] <- "1000"
eventsFinDamage$CROPMUL[grepl("m|M",eventsFinDamage$CROPMUL)] <- "1000000"
eventsFinDamage$CROPMUL[grepl("b|B",eventsFinDamage$CROPMUL)] <- "1000000000"
eventsFinDamage$CROPMUL <- as.numeric(eventsFinDamage$CROPMUL)
eventsFinDamage$CROPMUL[is.na(eventsFinDamage$CROPMUL)] <- 1
eventsFinDamage$CROPTOT <- eventsFinDamage$CROPDMG * eventsFinDamage$CROPMUL
eventsFinDamage$TOTDM <- eventsFinDamage$PROPTOT + eventsFinDamage$CROPTOT
eventsFinDamageTOT <- eventsFinDamage[,c("EVTYPE","PROPTOT","CROPTOT","TOTDM")]
eventsDmg <- aggregate(cbind(PROPTOT , CROPTOT , TOTDM) ~ EVTYPE,data=eventsFinDamageTOT,FUN = sum,na.rm = TRUE)
threshold <- mean(eventsDmg$TOTDM)
#events with casualty greater than certain threshold
totFinDmg <- eventsDmg[eventsDmg$TOTDM > threshold,]
totDmgHist <- ggplot(data=totFinDmg, aes(x =factor(totFinDmg$EVTYPE) ,y = totFinDmg$TOTDM, fill = totFinDmg$EVTYPE))
totDmgHist <- totDmgHist + theme( axis.text.x=element_text(size=16, angle=40, vjust=.8, hjust=1.01))
totDmgHist = totDmgHist + geom_bar(stat = "identity")
totDmgHist = totDmgHist + ggtitle("Top Events Total Financial Damage")
totDmgHist = totDmgHist + xlab("Event Type")
totDmgHist = totDmgHist + ylab("Total Financial Damage")
totDmgHist <- totDmgHist + guides(fill=guide_legend(title="Event Type"))
print(totDmgHist)
#plot both crop and property total damage
orderedEventsDmg <- eventsDmg[with(eventsDmg, order(-PROPTOT, -CROPTOT)), c("EVTYPE","PROPTOT","CROPTOT")]
topOrderedEventsDmg <- orderedEventsDmg[1:9,]
mEventsFinDamage <- melt(topOrderedEventsDmg, id=c("EVTYPE"))
ggplot(mEventsFinDamage, aes(EVTYPE, value, fill=variable)) +
geom_bar(position="dodge",stat="identity") +
ggtitle("Top Events Financial Damage") +
xlab("Event Type") +
ylab("Damage Value") +
theme( axis.text.x=element_text(size=16, angle=40, vjust=.8, hjust=1.01))
topOrderedEventsDmg
## EVTYPE PROPTOT CROPTOT
## 29 flood 167518189417 12380089100
## 69 hurricane 84656180010 5505292800
## 113 tornado 56993100640 414962910
## 103 storm surge 43323536000 5000
## 49 hail 17619993958 3114213053
## 28 fire 8501628500 403281630
## 116 tropical storm 7703890550 678346000
## 142 winter storm 6688497260 26944000
## 110 thunderstorm 6431638935 652847108
Through our analysis we were able to detect weather events that are most harmful to human health. They could be found in the table below.
## EVTYPE FATALITIES INJURIES
## 78 tornado 5636 91407
## 31 heat 3138 9224
## 17 flood 1525 8604
## 56 lightning 817 5231
## 67 rip current 577 529
## 82 tstm wind 514 6970
## 7 cold 436 316
## 44 high wind 294 1480
## 2 avalanche 224 170
And the weather events that are most harmful to economy according to our analysis could be found below.
## EVTYPE PROPTOT CROPTOT
## 29 flood 167518189417 12380089100
## 69 hurricane 84656180010 5505292800
## 113 tornado 56993100640 414962910
## 103 storm surge 43323536000 5000
## 49 hail 17619993958 3114213053
## 28 fire 8501628500 403281630
## 116 tropical storm 7703890550 678346000
## 142 winter storm 6688497260 26944000
## 110 thunderstorm 6431638935 652847108