Synopsis

This report explores U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database (1950-2011). We identify events(top 6) which had the most harmful effects on human population and also pin point events(top 6) leading to greatest economic consequences. Reference data is storm data and we make use of 7 events(“EVTYPE”, “FATALITIES”,“INJURIES”,“PROPDMG”,“PROPDMGEXP”,“CROPDMG”,“CROPDMGEXP”) to analyze, conclude and present our investigation. Our final conclusion is Tornados are the most harmful event for population health and at the same time have the greatest economic consequences in the United States.

Data Processing

Loading libraries

library(ggplot2)
library(plyr)
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## Registered S3 method overwritten by 'R.oo':
##   method        from       
##   throw.default R.methodsS3
## R.oo v1.22.0 (2018-04-21) successfully loaded. See ?R.oo for help.
## 
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
## 
##     getClasses, getMethods
## The following objects are masked from 'package:base':
## 
##     attach, detach, gc, load, save
## R.utils v2.9.0 successfully loaded. See ?R.utils for help.
## 
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
## 
##     timestamp
## The following objects are masked from 'package:base':
## 
##     cat, commandArgs, getOption, inherits, isOpen, nullfile,
##     parse, warnings
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Download original Storm Data from URL.Unzip and read data from csv

download.file(url='https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2', destfile='dataset.csv.bz2')
bunzip2("dataset.csv.bz2", "dataset.csv", remove = FALSE, skip = TRUE)
## [1] "dataset.csv"
## attr(,"temporary")
## [1] FALSE
dataset <- read.csv("dataset.csv")

Cleaning the data

Prepare data for question #1

Group data by variable “EVTYPE” and calculate harmful by FATALITIES and by INJURIES

FATALITIES_data <-aggregate(dataset$FATALITIES, by=list(EVTYPE=dataset$EVTYPE), FUN=sum)
#order and select top 6 
FATALITIES_data_6<- head (FATALITIES_data[order(-FATALITIES_data$x),])


#calculate harmful by INJURIES 
INJURIES_data <-aggregate(dataset$INJURIES, by=list(EVTYPE=dataset$EVTYPE), FUN=sum)
#order and select top 6 
INJURIES_data_6<- head (INJURIES_data[order(-INJURIES_data$x),])

# Merging Data

FATALITIES_data_6$Type.of.Harmful <- "FATALITIES"
INJURIES_data_6$Type.of.Harmful <- "INJURIES"
d <- rbind(FATALITIES_data_6, INJURIES_data_6)
print(d)
##              EVTYPE     x Type.of.Harmful
## 834         TORNADO  5633      FATALITIES
## 130  EXCESSIVE HEAT  1903      FATALITIES
## 153     FLASH FLOOD   978      FATALITIES
## 275            HEAT   937      FATALITIES
## 464       LIGHTNING   816      FATALITIES
## 856       TSTM WIND   504      FATALITIES
## 8341        TORNADO 91346        INJURIES
## 8561      TSTM WIND  6957        INJURIES
## 170           FLOOD  6789        INJURIES
## 1301 EXCESSIVE HEAT  6525        INJURIES
## 4641      LIGHTNING  5230        INJURIES
## 2751           HEAT  2100        INJURIES

Prepare data for question #2

dataset1<-dataset

unique(dataset1$PROPDMGEXP)
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(dataset1$CROPDMGEXP)
## [1]   M K m B ? 0 k 2
## Levels:  ? 0 2 B k K m M
tempPROPDMG <- mapvalues(dataset1$PROPDMGEXP,
                         c("K","M","", "B","m","+","0","5","6","?","4","2","3","h","7","H","-","1","8"), 
                         c(1e3,1e6, 1, 1e9,1e6,  1,  1,1e5,1e6,  1,1e4,1e2,1e3,  1,1e7,1e2,  1, 10,1e8))


tempCROPDMG <- mapvalues(dataset1$CROPDMGEXP,
                         c("","M","K","m","B","?","0","k","2"),
                         c( 1,1e6,1e3,1e6,1e9,1,1,1e3,1e2))

# Normalize data

dataset1$PROPTOTALDMG <- as.numeric(tempPROPDMG) * dataset1$PROPDMG
dataset1$CROPTOTALDMG <- as.numeric(tempCROPDMG) * dataset1$CROPDMG

# Calculate total of damage (CROPTOTALDMG+PROPTOTALDMG) 

dataset1$TOTALDMG <- dataset1$PROPTOTALDMG + dataset1$CROPTOTALDMG

# Calculate sum of PROPTOTALDMG ang group by type
data1 <-aggregate(dataset1$PROPTOTALDMG, by=list(EVTYPE=dataset1$EVTYPE), FUN=sum)
#order and select top 6 
data1<- head (data1[order(-data1$x),])

# Calculate sum of CROPTOTALDMG ang group by type

data2 <-aggregate(dataset1$CROPTOTALDMG, by=list(EVTYPE=dataset1$EVTYPE), FUN=sum)
#order and select top 6 
data2<- head (data2[order(-data2$x),])

# Calculate sum of CROPTOTALDMG+PROPTOTALDMG ang group by type

data3 <-aggregate(dataset1$TOTALDMG, by=list(EVTYPE=dataset1$EVTYPE), FUN=sum)
#order and select top 6 
data3<- head (data3[order(-data3$x),])

# Merging Data

data1$Type <- "CROPDMG"
data2$Type <- "PROPDMG"
data3$Type<- "TOTALDMG"
dd <- rbind(data1, data2, data3)
print(dd)
##                 EVTYPE          x     Type
## 834            TORNADO 12994100.4  CROPDMG
## 153        FLASH FLOOD  5720614.8  CROPDMG
## 856          TSTM WIND  5353154.0  CROPDMG
## 170              FLOOD  3664305.5  CROPDMG
## 760  THUNDERSTORM WIND  3514795.8  CROPDMG
## 244               HAIL  2793732.7  CROPDMG
## 2441              HAIL  2320765.0  PROPDMG
## 1531       FLASH FLOOD   718045.2  PROPDMG
## 1701             FLOOD   677650.9  PROPDMG
## 8561         TSTM WIND   437255.7  PROPDMG
## 8341           TORNADO   399909.5  PROPDMG
## 7601 THUNDERSTORM WIND   267514.2  PROPDMG
## 8342           TORNADO 13394009.9 TOTALDMG
## 1532       FLASH FLOOD  6438660.0 TOTALDMG
## 8562         TSTM WIND  5790409.6 TOTALDMG
## 2442              HAIL  5114497.7 TOTALDMG
## 1702             FLOOD  4341956.4 TOTALDMG
## 7602 THUNDERSTORM WIND  3782310.0 TOTALDMG

Results

1- Across the United States, which types of events are most harmful with respect to population health?

Plot below shows the results of the analysis

p1 <- ggplot(d, aes(EVTYPE,x, fill = Type.of.Harmful)) + geom_bar(stat="identity")+
  labs(x = "Event Type", y = "Count") +
  labs(title = "Top 6 harmful events by type of harmful") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0))
print (p1)

The plot reveals that tornados are the event the most harmful for population health followed by excessive heat.

2- Across the United States, which types of events have the greatest economic consequences?

Plot below shows the results of the analysis

p2 <- ggplot(dd, aes(EVTYPE,x), grpop=type) + geom_bar(aes(colour=Type, fill= Type),stat="identity")+
  labs(x = "Event Type", y = "Count in $") +
  labs(title = "Top 6 economic consequences by type of events") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0))

print (p2)

Tornados are the most harmful event for population health and at the same time have the greatest economic consequences in the United States