Synopsis

The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events.

Following questions have been attempted to be answered :-

  1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
  2. Across the United States, which types of events have the greatest economic consequences?

Reading the data

Please set the working directory as it is in your working environment

Below code will read the data from csv file. Later convert into table format in order to use
functions of the dplyr package. See library(dplyr) is already loaded. You may choose any graphical package for the analysis. Here ggplot has been used.

setwd("C:/Working Directory/Office/Data Science/Coursera/Reproducible Research")
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.3
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.4.3
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(grid)

## Reading the dataset ##

stormdata <- read.csv("repdata_data_StormData.csv.bz2")
dim(stormdata)
names(stormdata)

## Select the Variables which are needed from the storm data ##

stormdata <-  stormdata[,c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG",
                         "CROPDMGEXP")]
names(stormdata)
head(stormdata)

Data Processing

Processing the data for population health damage

## 1. Impact on the population health will be determined with respect to Fatalities and Injuries ##

stormdata <- group_by(stormdata,EVTYPE)
fatalities <-  summarise(stormdata, fatality = sum(FATALITIES))
injuries <- summarise(stormdata, injury = sum(INJURIES))
fatalities <-  arrange(fatalities, desc(fatality))
injuries  <-    arrange(injuries, desc(injury))

head(fatalities,15)
head(injuries,15)
ungroup(stormdata)

Processing the data for the economic damage

The columns PROPEXP and CROPEXP have characters representing the multiplication factor (thousands/hundreds/millions/billions etc) which are transformed for calculating the total value of the population and crop damage respectively. Check the function ExponentValue which contains this value which would be raised to the power of 10 in the later piece of code.

## Get the unique characters associated to reflect the exponential values ## 
UniquePropExp <-  unique(stormdata$PROPDMGEXP)
UniquePropExp
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
UniqueCropExp <- unique(stormdata$CROPDMGEXP)
UniqueCropExp
## [1]   M K m B ? 0 k 2
## Levels:  ? 0 2 B k K m M

Function to extract the exponent value

## Write a function to return the corresponding multiplication factor of 10 ## 
ExponentValue <- function(value) { 
  
  if(value %in% c("h","H"))         #Hundreds#
    return (2)
  else if (value %in% c("k","K"))   #Thousands#
    return(3)
  else if (value %in% c("m","M"))   #Million#
    return(6)
  else if (value %in% c("b","B"))  # Billion#
     return(9)
  else if (value %in% c("", "-","+","?"))
    return(0)
  else if (!is.na(as.numeric(value)))
    return (as.numeric(value))
  
}

stormdata <- group_by(stormdata,EVTYPE)

propExp <-  sapply(stormdata$PROPDMGEXP, FUN = ExponentValue)
cropExp <-  sapply(stormdata$CROPDMGEXP, FUN = ExponentValue)

stormdata$PROPDMG <-  stormdata$PROPDMG * (10^propExp)
stormdata$CROPDMG <-  stormdata$CROPDMG * (10^cropExp)

cropdamage <-  summarise(stormdata, cropdmg = sum(CROPDMG))
propdamage <- summarise(stormdata, propdmg = sum(PROPDMG))

cropdamage <-   arrange(cropdamage, desc(cropdmg))
propdamage  <-  arrange(propdamage, desc(propdmg))

head(cropdamage)
head(propdamage)

Results

Tornado and Excessive heat appears to have the most impact on the population health.

Drought and Flashfloods appears to have the maximum impact on economic damage.

## Plot for the Impact on the population health ## 

plot1 <- ggplot(data=head(injuries,15), aes(x=reorder(EVTYPE,injury), y=injury)) +
  geom_bar(fill="olivedrab",stat="identity")  + coord_flip() + 
  ylab("Total number of injuries") + xlab("Event Type") +
  ggtitle("Top 15 health impact of weather in United States") +
  theme(legend.position="none")

plot2 <- ggplot(data=head(fatalities,15), aes(x=reorder(EVTYPE,fatality),y=fatality)) +
  geom_bar(fill="red4",stat="identity") + coord_flip() +
  ylab("Total number of fatalities") + xlab("Event Type") +
  theme(legend.position="none")

grid.arrange(plot1, plot2, nrow =2)

plot1 <- ggplot(data=head(cropdamage,15), aes(x=reorder(EVTYPE,cropdmg), y=log10(cropdmg))) +
  geom_bar(fill="olivedrab",stat="identity")  + coord_flip() + 
  ylab("Crop Damage in dollars (logarithmic scale") + xlab("Event Type") +
  ggtitle("Top 15 economic impact of weather conditions in US") +
  theme(legend.position="none")

plot2 <- ggplot(data=head(propdamage,15), aes(x=reorder(EVTYPE,propdmg),y=log10(propdmg))) +
  geom_bar(fill="red4",stat="identity") + coord_flip() +
  ylab("Property Damage in Dollars (Logarithmic Scale") + xlab("Event Type") +
  theme(legend.position="none")

grid.arrange(plot1, plot2, nrow =2)