Abstract

Over the past half century, natural disasters have become ever recurring and malicious in terms of its effect on the population. From hurricanes to heavy rains, the damages on life and property have been significant. This study goes on to analyse the storm data made publicly available by the National Weather Service. The analysis seeks to summarise the effects of natural events on the lives of the population and the damages to property it brings about. Due to the plethora of natural events which disrupt the populace and economy, only the top ten events responsible have been identified, studied and projected.

Data Processing

Making sure that the storm data files are present in the working directory the read.csv funciton is used to read the .csv files into the variable ‘storm’. After storing the data into the variable ‘storm’, we proceed on to Part One of the analysis, where the casualities resulting natural events are studied.

knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
storm<- read.csv("repdata-data-StormData.csv.bz2",header = TRUE, sep = "," ) 

Part One - Casualties Data Processing

We start with copying select variables - event, injuries and fatalities - from the variable storm into another variable ‘s_cas’. The data frame ‘s_cas’ is then sorted using the mutate, ddply and arrange of the dplyr and the plyr package. The resulting data frame is a table with natural events arranged in descending order of their casualty count.

knitr::opts_chunk$set(echo = TRUE,cache=TRUE)
library("dplyr")
library("plyr")

# storing variables needed to answer 1st question (storm casualities)
s_cas<- data.frame(storm$EVTYPE,storm$FATALITIES,storm$INJURIES)
colnames(s_cas)<- c("Event", "Fatalities","Injuries")

# arranging the s_cas data frame
s_cas<- mutate(s_cas, Casualities = Fatalities + Injuries)
s_cas<- ddply(s_cas, "Event", numcolwise(sum))
s_cas<- arrange(s_cas,desc(Casualities))

Part Two - Economic Damage Data Processing

The second part of the data processing is conducted to analyse the econmic damages incured as a result of natural events. The incured costs are due to two main reason: Property damages and Crop damages. For this part of the analysis, column variables relating to property damages and crop damages are copied into the variable ‘s_econ’. Then rows pertaining to zero economic damages are deleted to reduce the size of the data set.

knitr::opts_chunk$set(echo = TRUE,cache=TRUE)
# storing variables needed to answer 2nd question (storm economics)
s_econ<- data.frame(storm$EVTYPE,
                    storm$PROPDMG,
                    storm$PROPDMGEXP,
                    storm$CROPDMG, 
                    storm$CROPDMGEXP)
colnames(s_econ)<- c("Event", 
                     "Property Damage",
                     "Property Damage-Magnitude",
                     "Crop Damage",
                     "Crop Damage-Magnitude")

#removing rows with zero in economic damages related to both property and crop 
s_econ<- s_econ[apply(s_econ[c(2,4)],1,function(z) any(z!=0)),]

The next part of the processing relates to replacing the letters in the magnitude column to numbers, i.e ‘h’ to 100, ‘K’ to 1000 and so on. To achieve this a combination of for, if and else statements are used.

knitr::opts_chunk$set(echo = TRUE,cache=TRUE)

# running loop to covert character to integer for property damages(eg: k to 1000)
s_econ$`Property Damage-Magnitude`<- as.character(s_econ$`Property Damage-Magnitude`)
s_econ$`Crop Damage-Magnitude`<- as.character(s_econ$`Crop Damage-Magnitude`)

for(i in 1:nrow(s_econ))
{
  if(s_econ$`Property Damage-Magnitude`[i] == "H" | s_econ$`Property Damage-Magnitude`[i] == "h" )
  {s_econ$`Property Damage-Magnitude`[i] <- 100}
  else if(s_econ$`Property Damage-Magnitude`[i] == "K" | s_econ$`Property Damage-Magnitude`[i] == "k")
  {s_econ$`Property Damage-Magnitude`[i] <- 1000}
  else if(s_econ$`Property Damage-Magnitude`[i] == "M" | s_econ$`Property Damage-Magnitude`[i] == "m")
  {s_econ$`Property Damage-Magnitude`[i] <- 1000000}
  else if(s_econ$`Property Damage-Magnitude`[i] == "B" | s_econ$`Property Damage-Magnitude`[i] == "b")
  {s_econ$`Property Damage-Magnitude`[i] <- 1000000000}
  else {s_econ$`Property Damage-Magnitude`[i]<- s_econ$`Property Damage-Magnitude`[i]}
}

# running loop to covert character to integer for crop damages(eg: k to 1000)
for(i in 1:nrow(s_econ))
{
  if(s_econ$`Crop Damage-Magnitude`[i] == "H" | s_econ$`Crop Damage-Magnitude`[i] == "h")
  {s_econ$`Crop Damage-Magnitude`[i] <- 100}
  else if(s_econ$`Crop Damage-Magnitude`[i] == "K" | s_econ$`Crop Damage-Magnitude`[i] == "k")
  {s_econ$`Crop Damage-Magnitude`[i] <- 1000}
  else if(s_econ$`Crop Damage-Magnitude`[i] == "M" | s_econ$`Crop Damage-Magnitude`[i] == "m")
  {s_econ$`Crop Damage-Magnitude`[i] <- 1000000}
  else if(s_econ$`Crop Damage-Magnitude`[i] == "B" | s_econ$`Crop Damage-Magnitude`[i] == "b")
  {s_econ$`Crop Damage-Magnitude`[i] <- 1000000000}
  else {s_econ$`Crop Damage-Magnitude`[i]<- s_econ$`Crop Damage-Magnitude`[i]}
}

Futher on, the resuting data frame s_econ is cleaned and sorted, screeing out irrelevant information.

knitr::opts_chunk$set(echo = TRUE,cache=TRUE)
#removing NA values and calculating Total Damages 
s_econ$`Property Damage-Magnitude`<- as.numeric(s_econ$`Property Damage-Magnitude`)
s_econ$`Crop Damage-Magnitude`<- as.numeric(s_econ$`Crop Damage-Magnitude`)
s_econ<- s_econ[!with(s_econ,is.na(`Property Damage-Magnitude`)& is.na(`Crop Damage-Magnitude`)),]
s_econ<- mutate(s_econ, Total_Property_Damage = `Property Damage` * `Property Damage-Magnitude`)
s_econ<- mutate(s_econ, Total_Crop_Damage = `Crop Damage` * `Crop Damage-Magnitude`)
s_econ[2:5]<- NULL
s_econ<- ddply(s_econ, "Event", numcolwise(sum))
s_econ[is.na(s_econ)] <- 0
s_econ<- mutate(s_econ, Total_Damages = Total_Property_Damage + Total_Crop_Damage)
s_econ<- arrange(s_econ,desc(Total_Damages))

Results

After having, processed, cleaned and sorted the data, the plots for each of the parts are graphed below.

Part One - Casualties Result

knitr::opts_chunk$set(echo = TRUE,cache=TRUE)
# sorting and plotting the top 10 most harmful events
top_s_cas<- s_cas[1:10,]
rownames(top_s_cas)<- top_s_cas$Event
top_s_cas$Event<- NULL
top_s_cas$Casualities<- NULL
count<- t(top_s_cas)
par(mai = c(1.7,1,0.5,0.5))
barplot(count, 
        col = c("blue","red"),
        las=2, 
        legend = rownames(count), 
        ylab = "Number of Casualities", 
        main = "Top 10 Most Harmful Natural Events", 
        cex.axis = 0.8,
        cex.names = 0.78)

c_mean<- mean(s_cas$Casualities)
c_max<- s_cas$Casualities[1]
c_min<- s_cas$Casualities[10]

From the barplot above it becomes evident that tornados are a major cause of casualties since 1950, with about 9.697910^{4} up until 2011. Winter storms have resulted in the lowest number of casualties -1527 - of the analysed 10 events between 1950 - 2011. On an average, the number of casualties of then natural events that took place between 1950 and 2011 is 158.0436548.

Part Two - Economic Damages Result

knitr::opts_chunk$set(echo = TRUE,cache=TRUE)
# sorting and plotting the top 10 economically most damaging events 
top_s_econ<- s_econ[1:10,]
rownames(top_s_econ)<- top_s_econ$Event
top_s_econ$Event<- NULL
top_s_econ$Total_Damages<- NULL
counts<- t(top_s_econ)
par(mai = c(1.7,1,0.5,0.5))
counts<- t(top_s_econ)
par(mai = c(1.7,1,0.5,0.5))
barplot(counts, 
        col = c("blue","red"),
        las=2, 
        legend = rownames(counts), 
        ylab = "Cost of Damages (in Dollars)", 
        main = "Top 10 Natural Events with the Most \nEconomic Consequences", 
        cex.axis = 0.68,
        cex.names = 0.55,
        cex.lab = 0.75)

e_mean<- mean(s_econ$Total_Damages, na.rm = TRUE)
e_max<- s_econ$Total_Damages[1]
e_min<- s_econ$Total_Damages[10]

The above barplot articulates the economic losses incurred as a result of natural events that took place between 1950 and 2011. It becomes evident that damages incurred due to Crop losses add up to only a small percentage of the total damages, while property damages being the principle contributor here. With losses of about 6.93058410^{10} dollars Typhoons(hurricane) are the biggest contributor towards property damages. At 6.24110^{8} dollars in economic losses, Wild Fires were the least consequent of the top 10 natural events scrutinized. On an average, losses due to natural events mounted up to 3.147297510^{8} dollars for the defined time period.