Synopsis

The basic goal of this analysis is to explore the NOAA Storm Database and answer some basic questions about severe weather events. The data used for this analysis is an official publication of the National Oceanic and Atmospheric Administration (NOAA) which documents the occurrence of storms and other significant weather phenomena having sufficient intensity to cause loss of life, injuries, significant property damage, and/or disruption to commerce. class This analysis is aimed at finding the types of weather event most harmful to population health and the types of even causing greatest financial impact.

Data Preparation and Processing

Data import and review

Download the Storm data file “repdata%2Fdata%2FStormData.csv.bz2” into the local project folder and set it as working directory.

# Set working directory containing downloaded Storm Data CSV file and read it into Data Frame
require(readr)
## Loading required package: readr
StormData<-read_csv("repdata%2Fdata%2FStormData.csv.bz2")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   STATE__ = col_double(),
##   COUNTY = col_double(),
##   BGN_RANGE = col_double(),
##   COUNTY_END = col_double(),
##   END_RANGE = col_double(),
##   LENGTH = col_double(),
##   WIDTH = col_double(),
##   F = col_integer(),
##   MAG = col_double(),
##   FATALITIES = col_double(),
##   INJURIES = col_double(),
##   PROPDMG = col_double(),
##   CROPDMG = col_double(),
##   LATITUDE = col_double(),
##   LONGITUDE = col_double(),
##   LATITUDE_E = col_double(),
##   LONGITUDE_ = col_double(),
##   REFNUM = col_double()
## )
## See spec(...) for full column specifications.
dim(StormData)
## [1] 902297     37

Data Preprocessing

For the purpose of this analysis we need data related to fatalities, injuries and property damage. Taking subset of data from Storm Data contailing these information alongwith Event type and finantial loss incurred in Property damage and crop damage.

subset_data<-StormData[,c('EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]

subset_data1<-subset(subset_data, FATALITIES>0 | INJURIES>0 | PROPDMG>0 | CROPDMG>0)
#subset_data1<-subset(subset_data, FATALITIES>0 | INJURIES>0 | PROPDMG>0 | CROPDMG>0)
dim(subset_data1)
## [1] 254633      7

Some event types are mentioned with several different names. Making them unique.

subset_data1<-subset_data1[!grepl("\\?" ,subset_data1$EVTYPE),]

subset_data1$EVTYPE<-gsub("WINDS", "WIND", subset_data1$EVTYPE)

subset_data1$EVTYPE<-gsub("FLOODS|FLOODING", "FLOOD", subset_data1$EVTYPE)

subset_data1$EVTYPE<-gsub("TSTM|THUDERSTORM|THUNDEERSTORM|THUNDERSNOW|
                               THUNDERESTORM|TUNDERSTORM|THUNDERTORM|THUNERSTORM!THUNDERSTORM WIND","THUNDERSTORM",
                          subset_data1$EVTYPE)

subset_data1$EVTYPE<-gsub("FLOOD FLASH","FLASH FLOOD",subset_data1$EVTYPE)

subset_data1$EVTYPE<-gsub("FLOODFLODD","FLOOD",subset_data1$EVTYPE)

subset_data1$EVTYPE<-gsub("THUNDERSTORM WIND (G45)","THUNDERSTORM",subset_data1$EVTYPE)

subset_data1$EVTYPE<-gsub("WINTRY","WINTER",subset_data1$EVTYPE)
subset_data1$EVTYPE<-gsub("WINDHALL","WIND",subset_data1$EVTYPE)
subset_data1$EVTYPE<-gsub("FLOODS|FLOODING","FLOOD",subset_data1$EVTYPE)
subset_data1$EVTYPE<-gsub("LAKEEFFECT","LAKE EFFECT",subset_data1$EVTYPE)

PROPDMGEXP and CROPDMGEXP data listed different value format. Converting these value to numeric data.

## Property Damage column transformation. 

subset_data1$PROPDMGEXP=as.character((subset_data1$PROPDMGEXP))
subset_data1[is.na(subset_data1)]<-0
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP %in% c('','-','?','+' , '0')] <- 1
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP %in% c('h','H','2')] <- 100
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP %in% c('k','K','3')] <- 1000
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP == '4'] <- 1e4
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP == '5'] <- 1e5
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP %in% c('m','M','6')] <- 1e6
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP == '7'] <- 1e7
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP == '8'] <- 1e8
subset_data1$PROPDMGEXP=as.numeric(subset_data1$PROPDMGEXP)
## Warning: NAs introduced by coercion
subset_data1$PROPDMG<-as.numeric( subset_data1$PROPDMG)
subset_data1$PROPDMG<-subset_data1$PROPDMG * subset_data1$PROPDMGEXP/1e6

## Crop Damage column transformation

subset_data1$CROPDMGEXP =as.character(subset_data1$CROPDMGEXP)
subset_data1$CROPDMGEXP[subset_data1$CROPDMGEXP %in% c('','-','?','+','0')] <- 1
subset_data1$CROPDMGEXP[subset_data1$CROPDMGEXP %in% c('h','H','2')] <- 100
subset_data1$CROPDMGEXP[subset_data1$CROPDMGEXP %in% c('k','K','3')] <- 1000
subset_data1$CROPDMGEXP[subset_data1$CROPDMGEXP %in% c('m','M','6')] <- 1e6
subset_data1$CROPDMGEXP[subset_data1$CROPDMGEXP == 'B'] <- 1e7
subset_data1$CROPDMGEXP <- as.numeric(subset_data1$CROPDMGEXP)
subset_data1$CROPDMG <-as.numeric(subset_data1$CROPDMG) * subset_data1$CROPDMGEXP/1e6

Data Analysis

Load libraries required

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

1. Event Types having most harmful effect on population health

Human Fatalities

#Human fatality due to most harmful event
df0 <- matrix(1:12,ncol=1,byrow=TRUE)
df0 <- as.data.frame(df0)
data_fatal <- group_by(subset_data1, EVTYPE)
data_Fatality_D <- summarize(data_fatal, Sum_Fatality=sum(FATALITIES,na.rm=TRUE))
data_Fatality_D <- data_Fatality_D[order(-data_Fatality_D$Sum_Fatality),]
data_Fatality_D<-setNames(data_Fatality_D,  c("EVENT_TYPE", "Total_Fatality"))
data_Fatality_D1<- cbind(df0, data_Fatality_D[1:12,])
data_Fatality_D1[1:12,]
##    V1        EVENT_TYPE Total_Fatality
## 1   1           TORNADO           5633
## 2   2    EXCESSIVE HEAT           1903
## 3   3       FLASH FLOOD            999
## 4   4              HEAT            937
## 5   5         LIGHTNING            816
## 6   6 THUNDERSTORM WIND            702
## 7   7             FLOOD            476
## 8   8       RIP CURRENT            368
## 9   9         HIGH WIND            283
## 10 10         AVALANCHE            224
## 11 11      WINTER STORM            206
## 12 12      RIP CURRENTS            204

Below Plot shows most dangerous weather event resulting in Fatality.

#Plot
g = ggplot(data_Fatality_D1, aes(EVENT_TYPE, Total_Fatality))
g = g + geom_bar(aes(fill = EVENT_TYPE), stat = "identity")
g = g + theme(axis.text.x = element_text(angle=45, hjust=1))
g = g + labs(x = "Event Types", y = "Number of Fatalities", 
             title = " Top 12 Event Types With Most Fatalities in USA")
g

Tornado is the most sever weather event for the Human Fatalities followed by Excessive Heat

Human Injuries

Human injuries due to most harmful event types

#Human Injuries 
df1 <- matrix(1:12,ncol=1,byrow=TRUE)
df1 <- as.data.frame(df1)
data_fatal <- group_by(subset_data1, EVTYPE)
data_Injuries_D <- summarize(data_fatal, Sum_Fatality=sum(INJURIES,na.rm=TRUE))
data_Injuries_D <- data_Injuries_D[order(-data_Injuries_D$Sum_Fatality),]
data_Injuries_D<-setNames(data_Injuries_D,c("EVENT_TYPE", "Total_INJURIES"))
data_Injuries_D1<- cbind(df1, data_Injuries_D[1:12,])
data_Injuries_D1[1:12,]
##    V1        EVENT_TYPE Total_INJURIES
## 1   1           TORNADO          91346
## 2   2 THUNDERSTORM WIND           9353
## 3   3             FLOOD           6791
## 4   4    EXCESSIVE HEAT           6525
## 5   5         LIGHTNING           5230
## 6   6              HEAT           2100
## 7   7         ICE STORM           1975
## 8   8       FLASH FLOOD           1785
## 9   9         HIGH WIND           1439
## 10 10              HAIL           1361
## 11 11      WINTER STORM           1321
## 12 12 HURRICANE/TYPHOON           1275

Below plots demonstrate the most dangerous weather event impacting human injuries

#Plot

g = ggplot(data_Injuries_D1, aes(EVENT_TYPE, Total_INJURIES))
g = g + geom_bar(aes(fill = EVENT_TYPE), stat = "identity")
g = g + theme(axis.text.x = element_text(angle=45, hjust=1))
g = g + labs(x = "Event Types", y = "Number of Fatalities", 
             title = "Top 12 Event Types With Most Human injuries in USA")
g

Tornado is the most sever weather event for the Human Injuries

2. Event Types which have the greatest economic consequences

Property Damage

# Propery Damage
df_num <- matrix(1:12,ncol=1,byrow=TRUE)
df_num <- as.data.frame(df_num)
data_subset2 <- group_by(subset_data1, EVTYPE)
data_PROP_D <- summarize(data_subset2, Sum_PROP_Damage=sum(PROPDMG,na.rm=TRUE))
data_PROP_D <- data_PROP_D[order(-data_PROP_D$Sum_PROP_Damage),]
data_PROP_D<-setNames(data_PROP_D, c("EVENT_TYPE", "Total_PROP_Damage"))
data_PROP_D1 <- cbind(df_num, data_PROP_D[1:12,])
data_PROP_D1[1:12,]
##    V1        EVENT_TYPE Total_PROP_Damage
## 1   1           TORNADO         51647.381
## 2   2             FLOOD         22272.556
## 3   3       FLASH FLOOD         16139.221
## 4   4              HAIL         13935.268
## 5   5 THUNDERSTORM WIND          9921.537
## 6   6         HURRICANE          6168.319
## 7   7         HIGH WIND          4578.370
## 8   8         ICE STORM          3944.928
## 9   9 HURRICANE/TYPHOON          3805.840
## 10 10          WILDFIRE          3725.114
## 11 11    TROPICAL STORM          2553.891
## 12 12      WINTER STORM          1688.497

Below plots demonstrate the most dangerous weather event impacting economy by Property distruction

# PLOT
g = ggplot(data_PROP_D1, aes(EVENT_TYPE, Total_PROP_Damage))
g = g + geom_bar(aes(fill = EVENT_TYPE), stat = "identity")
g = g + theme(axis.text.x = element_text(angle=45, hjust=1))
g = g + labs(x = "Event Types", y = "Property Damage in millons", 
             title = "Top 12 Event Types Which caused Property damage in USA")
g

Tornado is the most sever weather event for the Property Damage followed by Floods

Crop Damage

# crop Damage
df_num1 <- matrix(1:12,ncol=1,byrow=TRUE)
df_num1 <- as.data.frame(df_num1)
subset_data2 <- group_by(subset_data1, EVTYPE)
data_CROP_D <- summarize(subset_data2, Sum_CROP_Damage=sum(CROPDMG,na.rm=TRUE))
data_CROP_D <- data_CROP_D[order(-data_CROP_D$Sum_CROP_Damage),]
data_CROP_D<-setNames(data_CROP_D,c("EVENT_TYPE", "Total_CROP_Damage"))
data_CROP_D1 <- cbind(df_num1, data_CROP_D[1:12,])
data_CROP_D1[1:12,]
##    V1        EVENT_TYPE Total_CROP_Damage
## 1   1           DROUGHT        12487.5660
## 2   2             FLOOD         5670.8740
## 3   3              HAIL         3025.9545
## 4   4         HURRICANE         2741.9100
## 5   5       FLASH FLOOD         1436.4332
## 6   6      EXTREME COLD         1292.9730
## 7   7 THUNDERSTORM WIND         1159.5102
## 8   8 HURRICANE/TYPHOON         1112.9728
## 9   9      FROST/FREEZE         1094.0860
## 10 10        HEAVY RAIN          733.3998
## 11 11         HIGH WIND          679.2919
## 12 12    TROPICAL STORM          678.3460

Draught is the most sever weather event for the Crop Damage followed by Floods

Results

Tornado is the most sever weather event for the Human Fatalities followed by Excessive Heat. Tornado is the most sever weather event for the Human Injuries. Tornado is the most sever weather event for the Property Damage followed by Floods. Draught is the most sever weather event for the Crop Damage followed by Floods