Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern. This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
The objective of this analysis to identify the storm events that cause most harm to life and to determine the economic consequences caused by damage to property and crop
#libraries
library(knitr)
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Set variables
url="https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
fil="StormData.csv.bz2"
dest= paste(getwd(),fil,sep="/")
#Check if file exists. If not, then download the data file
if (!file.exists(fil)) {
download.file(url,dest)
}
#Read the csv file into a data frame
stormdf=as.data.frame(read.csv(fil,header=TRUE,blank.lines.skip=TRUE,strip.white=TRUE,na.strings=c("?"," ")))
#Total number of records in the Storm database
dim(stormdf)
## [1] 902297 37
#Rename column name to a simpler name
names(stormdf)[1]<-paste("ST")
#Removing missing values, removing trailing and leading spaces and getting a subset of data and essential columns
sdf<-subset(stormdf,as.numeric(as.character(ST)) %in% c(1:100),select = c(EVTYPE, FATALITIES, INJURIES,PROPDMG,PROPDMGEXP, CROPDMG, CROPDMGEXP))
str(sdf)
## 'data.frame': 902297 obs. of 7 variables:
## $ EVTYPE : Factor w/ 984 levels " HIGH SURF ADVISORY",..: 833 833 833 833 833 833 833 833 833 833 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 18 levels "","-","+","0",..: 16 16 16 16 16 16 16 16 16 16 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 8 levels "","0","2","B",..: 1 1 1 1 1 1 1 1 1 1 ...
sdf<-sdf[grep("Summary", sdf$EVTYPE, ignore.case=TRUE, invert = TRUE) , ]
sdf<-subset(sdf,!(EVTYPE %in% "?"))
#Removing trailing and leading white space
sdf$EVTYPE=trimws(sdf$EVTYPE)
#Check if file exists. If not, then download the data file
fil="events.csv"
dest= paste(getwd(),fil,sep="/")
if (!file.exists(fil)) {
download.file(url,dest)
}
edf<-read.csv(fil,header=TRUE,stringsAsFactors=FALSE)
#Content of Events mappimng file
head(edf)
## EVTYPE event ecat
## 1 ASTRONOMICAL LOW TIDE Astronomical Low Tide Z
## 2 AVALANCE Avalanche Z
## 3 AVALANCHE Avalanche C
## 4 BLIZZARD Blizzard Z
## 5 BLIZZARD AND EXTREME WIND CHIL Blizzard Z
## 6 APACHE COUNTY
#Handle duplicate values in event txpe,standardizing the events as per Storm Data Documentation and removing records that do not carrx valid event type
sdf = sdf %>% left_join(edf, by = c("EVTYPE" = "EVTYPE"))
sdf = subset(sdf,sdf$event!="")
#Processing NAs to obtain valid fatalities and injury numbers
sdf$FATALITIES[is.na(sdf$FATALITIES)]=0
sdf$INJURIES [is.na(sdf$INJURIES)]=0
sdf$totfatinj=sdf$FATALITIES+sdf$INJURIES
#Padding zeros processing NAs to obatin property and crop damage dollar values
sdf$PROPDMGEXP<-gsub("B",9,sdf$PROPDMGEXP,ignore.case = TRUE)
sdf$PROPDMGEXP<-gsub("M",6,sdf$PROPDMGEXP,ignore.case = TRUE)
sdf$PROPDMGEXP<-gsub("K",3,sdf$PROPDMGEXP,ignore.case = TRUE)
sdf$PROPDMGEXP<-gsub("H",2,sdf$PROPDMGEXP,ignore.case = TRUE)
sdf$PROPDMGEXP<-gsub("\\-|\\+|\\?",0,sdf$PROPDMGEXP)
sdf$CROPDMGEXP<-gsub("B",9,sdf$CROPDMGEXP,ignore.case = TRUE)
sdf$CROPDMGEXP<-gsub("M",6,sdf$CROPDMGEXP,ignore.case = TRUE)
sdf$CROPDMGEXP<-gsub("K",3,sdf$CROPDMGEXP,ignore.case = TRUE)
sdf$CROPDMGEXP<-gsub("H",2,sdf$CROPDMGEXP,ignore.case = TRUE)
sdf$CROPDMGEXP<-gsub("\\-|\\+|\\?",0,sdf$CROPDMGEXP)
sdf$propdmgdlr<-sdf$PROPDMG*(10 ^ as.numeric(sdf$PROPDMGEXP))
sdf$propdmgdlr[is.na(sdf$propdmgdlr)]=0
sdf$cropdmgdlr<-sdf$CROPDMG*(10 ^ as.numeric(sdf$CROPDMGEXP))
sdf$cropdmgdlr[is.na(sdf$cropdmgdlr)]=0
sdf$totdmg<-sdf$propdmgdlr+sdf$cropdmgdlr
#Finding total fatalities, injuries, sorting and subsetting the top 10
fat=ddply(sdf,.(event),summarize,f_sum=sum(as.numeric(FATALITIES)))
fat=head(fat[order(fat$f_sum,decreasing = TRUE),],10)
names(fat)[1]=paste("Events")
names(fat)[2]=paste("Total_fatalities")
inj=ddply(sdf,.(event),summarize,i_sum=sum(as.numeric(INJURIES)))
inj=head(inj[order(inj$i_sum,decreasing=TRUE),],10)
names(inj)[1]=paste("Events")
names(inj)[2]=paste("Total_injuries")
fatinj=ddply(sdf,.(event),summarize,fi_sum=sum(as.numeric(totfatinj)))
fatinj=head(fatinj[order(fatinj$fi_sum,decreasing=TRUE),],10)
names(fatinj)[1]=paste("Events")
names(fatinj)[2]=paste("Total_fatalities_&_Injuries")
#Plotting Fatalities and Injuries data
fat
## Events Total_fatalities
## 40 Tornado 5633
## 12 Excessive Heat 2241
## 14 Flash Flood 1069
## 20 Heat 937
## 30 Lightning 817
## 39 Thunderstorm Wind 713
## 34 Rip Current 577
## 15 Flood 482
## 38 Strong Wind 364
## 48 Winter Weather 336
inj
## Events Total_injuries
## 40 Tornado 91364
## 39 Thunderstorm Wind 9525
## 12 Excessive Heat 7143
## 15 Flood 6794
## 30 Lightning 5231
## 48 Winter Weather 2401
## 27 Ice Storm 2170
## 20 Heat 2100
## 14 Flash Flood 1881
## 46 Wildfire 1608
fatinj
## Events Total_fatalities_&_Injuries
## 40 Tornado 96997
## 39 Thunderstorm Wind 10238
## 12 Excessive Heat 9384
## 15 Flood 7276
## 30 Lightning 6048
## 20 Heat 3037
## 14 Flash Flood 2950
## 48 Winter Weather 2737
## 27 Ice Storm 2272
## 38 Strong Wind 1815
par(mfrow=c(1,3),mar=c(8,4,8,4),las=3)
barplot(fat$Total_fatalities,names=fat$Events,ylab="Fatalities",main=strwrap("Top 10 events that caused most Fatalities",30),col="lavender")
barplot(inj$Total_injuries,names=inj$Events,ylab="Injuries",main=strwrap("Top 10 events that caused most Injuries",30),col="cornsilk")
barplot(fatinj$`Total_fatalities_&_Injuries`,names=fatinj$Events,ylab="Fatalities & Injuries",main=strwrap("Top 10 events that caused most Fatalities & Injuries",30),col="lightcyan")
#Finding total property and crop damage
prop=ddply(sdf,.(event),summarize,p_sum=sum(as.numeric(propdmgdlr)),p_y=p_sum/(10^9))
prop=head(prop[order(prop$p_sum, decreasing=TRUE),],10)
names(prop)[1]=paste("Events")
names(prop)[2]=paste("Total_Property_Damage")
names(prop)[3]=paste("Total_Property_Damage_in_Billions")
crop=ddply(sdf,.(event),summarize,c_sum=sum(as.numeric(cropdmgdlr)),c_y=c_sum/(10^9))
crop=head(crop[order(crop$c_sum, decreasing=TRUE),],10)
names(crop)[1]=paste("Events")
names(crop)[2]=paste("Total_Crop_Damage")
names(crop)[3]=paste("Total_Crop_Damage_in_Billions")
propcrop=ddply(sdf,.(event),summarize,pc_sum=sum(as.numeric(totdmg)),pc_y=pc_sum/(10^9))
propcrop=head(propcrop[order(propcrop$pc_sum, decreasing=TRUE),],10)
names(propcrop)[1]=paste("Events")
names(propcrop)[2]=paste("Total_Property_&_Crop_Damage")
names(propcrop)[3]=paste("Total_Property_&_Crop_Damage_in_Billions")
#Plotting Property and Crop damage data
prop
## Events Total_Property_Damage
## 15 Flood 150114521500
## 25 Hurricane (Typhoon) 84765335010
## 40 Tornado 56952159374
## 37 Storm Surge/Tide 47964937000
## 14 Flash Flood 17675209849
## 19 Hail 15978555956
## 39 Thunderstorm Wind 9976898173
## 42 Tropical Storm 8924463100
## 46 Wildfire 8496633500
## 48 Winter Weather 6815806801
## Total_Property_Damage_in_Billions
## 15 150.114521
## 25 84.765335
## 40 56.952159
## 37 47.964937
## 14 17.675210
## 19 15.978556
## 39 9.976898
## 42 8.924463
## 46 8.496633
## 48 6.815807
crop
## Events Total_Crop_Damage
## 8 Drought 13972566000
## 15 Flood 10728387950
## 25 Hurricane (Typhoon) 5515302800
## 27 Ice Storm 5027113500
## 19 Hail 3026173620
## 17 Frost/Freeze 1997061000
## 14 Flash Flood 1541980250
## 11 Excessive Cold/Wind Chill 1330023000
## 39 Thunderstorm Wind 1253311530
## 21 Heavy Rain 1060555800
## Total_Crop_Damage_in_Billions
## 8 13.972566
## 15 10.728388
## 25 5.515303
## 27 5.027113
## 19 3.026174
## 17 1.997061
## 14 1.541980
## 11 1.330023
## 39 1.253312
## 21 1.060556
propcrop
## Events Total_Property_&_Crop_Damage
## 15 Flood 160842909450
## 25 Hurricane (Typhoon) 90280637810
## 40 Tornado 57367120844
## 37 Storm Surge/Tide 47965792000
## 14 Flash Flood 19217190099
## 19 Hail 19004729576
## 8 Drought 15018672000
## 39 Thunderstorm Wind 11230209703
## 42 Tropical Storm 9637564100
## 27 Ice Storm 8999271560
## Total_Property_&_Crop_Damage_in_Billions
## 15 160.842909
## 25 90.280638
## 40 57.367121
## 37 47.965792
## 14 19.217190
## 19 19.004730
## 8 15.018672
## 39 11.230210
## 42 9.637564
## 27 8.999272
par(mfrow=c(1,3),mar=c(8,4,8,4),las=3)
barplot(prop$Total_Property_Damage_in_Billions,names=prop$Events,ylab="Property Damages (in Billions)",main=strwrap("Top 10 events that caused most Property Damages",30),col="lavender")
barplot(crop$Total_Crop_Damage_in_Billions,names=crop$Events,ylab="Crop Damages (in Billions)",main=strwrap("Top 10 events that caused most Crop Damages",30),col="cornsilk")
barplot(propcrop$`Total_Property_&_Crop_Damage_in_Billions`,names=propcrop$Events,ylab="Property & Crop Damages (in Billions)",main=strwrap("Top 10 events that caused most Property & Crop Damages",30),col="mistyrose")
From the available Storm data, it was detemined that Tornadoes caused the most fatalities followed by Excessive heat and Flash flood.
Tornadoes also topped the list in causing injuries to life followed by Thunderstorm windand Excessive heat.
The overall report shows that Tornadoes followed by Thunderstorm wind and Excessive heat caused the most damage to life.
From the available Storm data, it was detemined that Flood caused the most damage to Property followed by Hurricane/Typhoon and Tornadoes.
The crops were mainly damaged by Drought followed by Flood and Hurricane/Typhoon.
The overall damage report shows the Flood followed by Hurricane/Typhoon and Tornadoes caused the most economic consequences.