The basic goal of this analysis is to explore the NOAA Storm Database and answer some basic questions about severe weather events. The data used for this analysis is an official publication of the National Oceanic and Atmospheric Administration (NOAA) which documents the occurrence of storms and other significant weather phenomena having sufficient intensity to cause loss of life, injuries, significant property damage, and/or disruption to commerce. class This analysis is aimed at finding the types of weather event most harmful to population health and the types of even causing greatest financial impact.
Download the Storm data file “repdata%2Fdata%2FStormData.csv.bz2” into the local project folder and set it as working directory.
# Set working directory containing downloaded Storm Data CSV file and read it into Data Frame
require(readr)
## Loading required package: readr
StormData<-read_csv("repdata%2Fdata%2FStormData.csv.bz2")
## Parsed with column specification:
## cols(
## .default = col_character(),
## STATE__ = col_double(),
## COUNTY = col_double(),
## BGN_RANGE = col_double(),
## COUNTY_END = col_double(),
## END_RANGE = col_double(),
## LENGTH = col_double(),
## WIDTH = col_double(),
## F = col_integer(),
## MAG = col_double(),
## FATALITIES = col_double(),
## INJURIES = col_double(),
## PROPDMG = col_double(),
## CROPDMG = col_double(),
## LATITUDE = col_double(),
## LONGITUDE = col_double(),
## LATITUDE_E = col_double(),
## LONGITUDE_ = col_double(),
## REFNUM = col_double()
## )
## See spec(...) for full column specifications.
dim(StormData)
## [1] 902297 37
For the purpose of this analysis we need data related to fatalities, injuries and property damage. Taking subset of data from Storm Data contailing these information alongwith Event type and finantial loss incurred in Property damage and crop damage.
subset_data<-StormData[,c('EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]
subset_data1<-subset(subset_data, FATALITIES>0 | INJURIES>0 | PROPDMG>0 | CROPDMG>0)
#subset_data1<-subset(subset_data, FATALITIES>0 | INJURIES>0 | PROPDMG>0 | CROPDMG>0)
dim(subset_data1)
## [1] 254633 7
Some event types are mentioned with several different names. Making them unique.
subset_data1<-subset_data1[!grepl("\\?" ,subset_data1$EVTYPE),]
subset_data1$EVTYPE<-gsub("WINDS", "WIND", subset_data1$EVTYPE)
subset_data1$EVTYPE<-gsub("FLOODS|FLOODING", "FLOOD", subset_data1$EVTYPE)
subset_data1$EVTYPE<-gsub("TSTM|THUDERSTORM|THUNDEERSTORM|THUNDERSNOW|
THUNDERESTORM|TUNDERSTORM|THUNDERTORM|THUNERSTORM!THUNDERSTORM WIND","THUNDERSTORM",
subset_data1$EVTYPE)
subset_data1$EVTYPE<-gsub("FLOOD FLASH","FLASH FLOOD",subset_data1$EVTYPE)
subset_data1$EVTYPE<-gsub("FLOODFLODD","FLOOD",subset_data1$EVTYPE)
subset_data1$EVTYPE<-gsub("THUNDERSTORM WIND (G45)","THUNDERSTORM",subset_data1$EVTYPE)
subset_data1$EVTYPE<-gsub("WINTRY","WINTER",subset_data1$EVTYPE)
subset_data1$EVTYPE<-gsub("WINDHALL","WIND",subset_data1$EVTYPE)
subset_data1$EVTYPE<-gsub("FLOODS|FLOODING","FLOOD",subset_data1$EVTYPE)
subset_data1$EVTYPE<-gsub("LAKEEFFECT","LAKE EFFECT",subset_data1$EVTYPE)
PROPDMGEXP and CROPDMGEXP data listed different value format. Converting these value to numeric data.
## Property Damage column transformation.
subset_data1$PROPDMGEXP=as.character((subset_data1$PROPDMGEXP))
subset_data1[is.na(subset_data1)]<-0
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP %in% c('','-','?','+' , '0')] <- 1
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP %in% c('h','H','2')] <- 100
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP %in% c('k','K','3')] <- 1000
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP == '4'] <- 1e4
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP == '5'] <- 1e5
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP %in% c('m','M','6')] <- 1e6
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP == '7'] <- 1e7
subset_data1$PROPDMGEXP[subset_data1$PROPDMGEXP == '8'] <- 1e8
subset_data1$PROPDMGEXP=as.numeric(subset_data1$PROPDMGEXP)
## Warning: NAs introduced by coercion
subset_data1$PROPDMG<-as.numeric( subset_data1$PROPDMG)
subset_data1$PROPDMG<-subset_data1$PROPDMG * subset_data1$PROPDMGEXP/1e6
## Crop Damage column transformation
subset_data1$CROPDMGEXP =as.character(subset_data1$CROPDMGEXP)
subset_data1$CROPDMGEXP[subset_data1$CROPDMGEXP %in% c('','-','?','+','0')] <- 1
subset_data1$CROPDMGEXP[subset_data1$CROPDMGEXP %in% c('h','H','2')] <- 100
subset_data1$CROPDMGEXP[subset_data1$CROPDMGEXP %in% c('k','K','3')] <- 1000
subset_data1$CROPDMGEXP[subset_data1$CROPDMGEXP %in% c('m','M','6')] <- 1e6
subset_data1$CROPDMGEXP[subset_data1$CROPDMGEXP == 'B'] <- 1e7
subset_data1$CROPDMGEXP <- as.numeric(subset_data1$CROPDMGEXP)
subset_data1$CROPDMG <-as.numeric(subset_data1$CROPDMG) * subset_data1$CROPDMGEXP/1e6
Load libraries required
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
#Human fatality due to most harmful event
df0 <- matrix(1:12,ncol=1,byrow=TRUE)
df0 <- as.data.frame(df0)
data_fatal <- group_by(subset_data1, EVTYPE)
data_Fatality_D <- summarize(data_fatal, Sum_Fatality=sum(FATALITIES,na.rm=TRUE))
data_Fatality_D <- data_Fatality_D[order(-data_Fatality_D$Sum_Fatality),]
data_Fatality_D<-setNames(data_Fatality_D, c("EVENT_TYPE", "Total_Fatality"))
data_Fatality_D1<- cbind(df0, data_Fatality_D[1:12,])
data_Fatality_D1[1:12,]
## V1 EVENT_TYPE Total_Fatality
## 1 1 TORNADO 5633
## 2 2 EXCESSIVE HEAT 1903
## 3 3 FLASH FLOOD 999
## 4 4 HEAT 937
## 5 5 LIGHTNING 816
## 6 6 THUNDERSTORM WIND 702
## 7 7 FLOOD 476
## 8 8 RIP CURRENT 368
## 9 9 HIGH WIND 283
## 10 10 AVALANCHE 224
## 11 11 WINTER STORM 206
## 12 12 RIP CURRENTS 204
Below Plot shows most dangerous weather event resulting in Fatality.
#Plot
g = ggplot(data_Fatality_D1, aes(EVENT_TYPE, Total_Fatality))
g = g + geom_bar(aes(fill = EVENT_TYPE), stat = "identity")
g = g + theme(axis.text.x = element_text(angle=45, hjust=1))
g = g + labs(x = "Event Types", y = "Number of Fatalities",
title = " Top 12 Event Types With Most Fatalities in USA")
g
Tornado is the most sever weather event for the Human Fatalities followed by Excessive Heat
Human injuries due to most harmful event types
#Human Injuries
df1 <- matrix(1:12,ncol=1,byrow=TRUE)
df1 <- as.data.frame(df1)
data_fatal <- group_by(subset_data1, EVTYPE)
data_Injuries_D <- summarize(data_fatal, Sum_Fatality=sum(INJURIES,na.rm=TRUE))
data_Injuries_D <- data_Injuries_D[order(-data_Injuries_D$Sum_Fatality),]
data_Injuries_D<-setNames(data_Injuries_D,c("EVENT_TYPE", "Total_INJURIES"))
data_Injuries_D1<- cbind(df1, data_Injuries_D[1:12,])
data_Injuries_D1[1:12,]
## V1 EVENT_TYPE Total_INJURIES
## 1 1 TORNADO 91346
## 2 2 THUNDERSTORM WIND 9353
## 3 3 FLOOD 6791
## 4 4 EXCESSIVE HEAT 6525
## 5 5 LIGHTNING 5230
## 6 6 HEAT 2100
## 7 7 ICE STORM 1975
## 8 8 FLASH FLOOD 1785
## 9 9 HIGH WIND 1439
## 10 10 HAIL 1361
## 11 11 WINTER STORM 1321
## 12 12 HURRICANE/TYPHOON 1275
Below plots demonstrate the most dangerous weather event impacting human injuries
#Plot
g = ggplot(data_Injuries_D1, aes(EVENT_TYPE, Total_INJURIES))
g = g + geom_bar(aes(fill = EVENT_TYPE), stat = "identity")
g = g + theme(axis.text.x = element_text(angle=45, hjust=1))
g = g + labs(x = "Event Types", y = "Number of Fatalities",
title = "Top 12 Event Types With Most Human injuries in USA")
g
Tornado is the most sever weather event for the Human Injuries
# Propery Damage
df_num <- matrix(1:12,ncol=1,byrow=TRUE)
df_num <- as.data.frame(df_num)
data_subset2 <- group_by(subset_data1, EVTYPE)
data_PROP_D <- summarize(data_subset2, Sum_PROP_Damage=sum(PROPDMG,na.rm=TRUE))
data_PROP_D <- data_PROP_D[order(-data_PROP_D$Sum_PROP_Damage),]
data_PROP_D<-setNames(data_PROP_D, c("EVENT_TYPE", "Total_PROP_Damage"))
data_PROP_D1 <- cbind(df_num, data_PROP_D[1:12,])
data_PROP_D1[1:12,]
## V1 EVENT_TYPE Total_PROP_Damage
## 1 1 TORNADO 51647.381
## 2 2 FLOOD 22272.556
## 3 3 FLASH FLOOD 16139.221
## 4 4 HAIL 13935.268
## 5 5 THUNDERSTORM WIND 9921.537
## 6 6 HURRICANE 6168.319
## 7 7 HIGH WIND 4578.370
## 8 8 ICE STORM 3944.928
## 9 9 HURRICANE/TYPHOON 3805.840
## 10 10 WILDFIRE 3725.114
## 11 11 TROPICAL STORM 2553.891
## 12 12 WINTER STORM 1688.497
Below plots demonstrate the most dangerous weather event impacting economy by Property distruction
# PLOT
g = ggplot(data_PROP_D1, aes(EVENT_TYPE, Total_PROP_Damage))
g = g + geom_bar(aes(fill = EVENT_TYPE), stat = "identity")
g = g + theme(axis.text.x = element_text(angle=45, hjust=1))
g = g + labs(x = "Event Types", y = "Property Damage in millons",
title = "Top 12 Event Types Which caused Property damage in USA")
g
Tornado is the most sever weather event for the Property Damage followed by Floods
# crop Damage
df_num1 <- matrix(1:12,ncol=1,byrow=TRUE)
df_num1 <- as.data.frame(df_num1)
subset_data2 <- group_by(subset_data1, EVTYPE)
data_CROP_D <- summarize(subset_data2, Sum_CROP_Damage=sum(CROPDMG,na.rm=TRUE))
data_CROP_D <- data_CROP_D[order(-data_CROP_D$Sum_CROP_Damage),]
data_CROP_D<-setNames(data_CROP_D,c("EVENT_TYPE", "Total_CROP_Damage"))
data_CROP_D1 <- cbind(df_num1, data_CROP_D[1:12,])
data_CROP_D1[1:12,]
## V1 EVENT_TYPE Total_CROP_Damage
## 1 1 DROUGHT 12487.5660
## 2 2 FLOOD 5670.8740
## 3 3 HAIL 3025.9545
## 4 4 HURRICANE 2741.9100
## 5 5 FLASH FLOOD 1436.4332
## 6 6 EXTREME COLD 1292.9730
## 7 7 THUNDERSTORM WIND 1159.5102
## 8 8 HURRICANE/TYPHOON 1112.9728
## 9 9 FROST/FREEZE 1094.0860
## 10 10 HEAVY RAIN 733.3998
## 11 11 HIGH WIND 679.2919
## 12 12 TROPICAL STORM 678.3460
Draught is the most sever weather event for the Crop Damage followed by Floods
Tornado is the most sever weather event for the Human Fatalities followed by Excessive Heat. Tornado is the most sever weather event for the Human Injuries. Tornado is the most sever weather event for the Property Damage followed by Floods. Draught is the most sever weather event for the Crop Damage followed by Floods