Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database and analysing the events that cause the most damage to population and property.

Data Processing

Read the data from .csv.bz2 file

setwd("D:\\Training\\Coursera\\Reproducible Research\\Week4")
stormdatacomplete <- read.csv(bzfile("repdata_data_StormData.csv.bz2"), sep=",", header=T)

Retain only the relavant columns that are required for this analysis Column8: EVTYPE Column23: FATALITIES Column24: INJURIES Column25: PROPDMG Column26: PROPDMGEXP Column27: CROPDMG Column28: CROPDMGEXP

storm_data <- stormdatacomplete[ , c(8, 23:28)]
head(storm_data)

#Analysis of the event that causes maximum damage with respect to population health Aggregate injuries with respect to EVENT TYPE.

Injuriesdata <- aggregate(INJURIES~EVTYPE, storm_data, sum)
head(Injuriesdata)

Aggregate fatalities with respect to EVENT TYPE.

Fatalitiesdata <- aggregate(FATALITIES~EVTYPE, storm_data, sum)
head(Fatalitiesdata)

Combine both injuries and fatalities

HealthData <-merge(Injuriesdata, Fatalitiesdata)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
HealthData<-arrange(HealthData,desc(FATALITIES+INJURIES))
head(HealthData)

Add the number of Injuries and Fatalities in a new column and retain only the top 5 damage causing events

addition <- function (m,n) {
  m+n
}
temp <- mapply(addition,HealthData$INJURIES, HealthData$FATALITIES)

HealthData<-cbind(HealthData,"TOTALHEALTHDMG"=temp)
HealthDataSubset <- HealthData[1:5, ]
HealthDataSubset

Plot the graph of of the EVENT TYPE vs Total number of instances of Health Damage

eventType <-HealthDataSubset$EVTYPE
barplot(HealthDataSubset$TOTALHEALTHDMG,
main = "Top 5 Events causing Damage to Population ",
xlab = "Event Type",
ylab = "Total number of Injuries + Fatalities",
names.arg = eventType,
col = "darkred",
horiz = FALSE,
las = 2,
cex.names = 0.5)

From the above graph it can be seen that Tornado causes the highest damage to population

#Analysis of the event that causes maximum damage to property Convert the Values given in column PROPDMG/CROPDMG to the actual value by multiplying with the exponent given in PROPDMGEXP/CROPDMGEXP

storm_data$PROPDAMAGE = 0
storm_data[storm_data$PROPDMGEXP == "H", ]$PROPDAMAGE = storm_data[storm_data$PROPDMGEXP == "H", ]$PROPDMG * 10^2
storm_data[storm_data$PROPDMGEXP == "K", ]$PROPDAMAGE = storm_data[storm_data$PROPDMGEXP == "K", ]$PROPDMG * 10^3
storm_data[storm_data$PROPDMGEXP == "M", ]$PROPDAMAGE = storm_data[storm_data$PROPDMGEXP == "M", ]$PROPDMG * 10^6
storm_data[storm_data$PROPDMGEXP == "B", ]$PROPDAMAGE = storm_data[storm_data$PROPDMGEXP == "B", ]$PROPDMG * 10^9

storm_data$CROPDAMAGE = 0
storm_data[storm_data$CROPDMGEXP == "H", ]$CROPDAMAGE = storm_data[storm_data$CROPDMGEXP == "H", ]$CROPDMG * 10^2
storm_data[storm_data$CROPDMGEXP == "K", ]$CROPDAMAGE = storm_data[storm_data$CROPDMGEXP == "K", ]$CROPDMG * 10^3
storm_data[storm_data$CROPDMGEXP == "M", ]$CROPDAMAGE = storm_data[storm_data$CROPDMGEXP == "M", ]$CROPDMG * 10^6
storm_data[storm_data$CROPDMGEXP == "B", ]$CROPDAMAGE = storm_data[storm_data$CROPDMGEXP == "B", ]$CROPDMG * 10^9

Aggregate Property Damage with respect to EVENT TYPE.

PropertyDmgdata <- aggregate(PROPDAMAGE~EVTYPE, storm_data, sum)

Aggregate Crop Damage with respect to EVENT TYPE.

CropDmgdata <- aggregate(CROPDAMAGE~EVTYPE, storm_data, sum)

Combine both Property Damage and Crop Damage

EconomyData <-merge(PropertyDmgdata, CropDmgdata)
EconomyData<-arrange(EconomyData,desc(PROPDAMAGE+CROPDAMAGE))
head(EconomyData)

Add the values of Property Damage and Crop Damage in a new column and retain only the top 5 damage causing events

additionEco <- function (m,n) {
  (m+n)/10^9
}

tempEco <- mapply(additionEco,EconomyData$PROPDAMAGE, EconomyData$CROPDAMAGE)
EconomyData<-cbind(EconomyData,"TOTALHEALTHDMG"=tempEco)
EconomyDataSubset <- EconomyData[1:5, ]
EconomyDataSubset

Plot the graph of of the EVENT TYPE vs Total Economic Damage

eventType <-EconomyDataSubset$EVTYPE
barplot(EconomyDataSubset$TOTALHEALTHDMG,
main = "Top 5 Events causing Damage to Economy ",
xlab = "Event Type",
ylab = "Total Property Damage + Crop Damage",
names.arg = eventType,
col = "darkred",
horiz = FALSE,
las = 2,
cex.names = 0.5)

From the above graph it can be seen that Floods cause maximum economic damage.

Results

  1. Across the United States, tornadoes are most harmful with respect to population health

  2. Across the United States, floods have the greatest economic consequences.