NOAA STORM DATA ANALYSIS

Synopsis

For the following analysis we have the NOAA storm data from 1950 to 2011. From the given dataset we will take a look at the number of injuries, fatalities as well as the financial damage incurred due to various natural events occured.

Data Processing

The data is from the National Weather Service(https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2) and is unzipped using the bunzip2 function and loaded in R.

library(R.utils)
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "storm_data.csv.bz2", method = "curl")
bunzip2("storm_data.csv.bz2")

storm_data <- read.csv("storm_data.csv", header = TRUE)

Data Exploration

We will now explore the data and plot some figures to give us an overview of the data as a whole and the factors governing it. From the dataset we will use the factors of fatalaties and injuries first with respect to events and followed by the damages to Property and Crops by events and plot them for a basic trend.
We will now select the columns which are required for this.

library(dplyr)
new_storm_data <- select(storm_data, "STATE", "EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")

Number of Fatalities by events

Figuring out all the events in the dataset

unique_events <- unique(new_storm_data$EVTYPE)
length(unique_events)
## [1] 985

As we can see there are 985 unique events in the dataset. So it makes sense to take the top 6 events with the sum of fatalities by events.

fatalities <- with(new_storm_data,aggregate(FATALITIES, by = list(EVTYPE = EVTYPE), FUN = sum))
fatalities <- arrange(fatalities, desc(x))
head(fatalities, n = 6)
##           EVTYPE    x
## 1        TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3    FLASH FLOOD  978
## 4           HEAT  937
## 5      LIGHTNING  816
## 6      TSTM WIND  504

Plotting for the first 6 highest total fatalities

library(ggplot2)
ggplot(fatalities[1:6,], aes(EVTYPE,x)) + geom_bar(stat = "identity") + xlab("EVENT TYPE") + ylab("FATALITIES")

Number of injuries by events

We will follow the same procedure as we did for fatalities

injuries <- with(new_storm_data, aggregate(INJURIES, by = list(EVTYPE = EVTYPE), FUN = sum))
injuries <- arrange(injuries, desc(x))
head(injuries, n = 6)
##           EVTYPE     x
## 1        TORNADO 91346
## 2      TSTM WIND  6957
## 3          FLOOD  6789
## 4 EXCESSIVE HEAT  6525
## 5      LIGHTNING  5230
## 6           HEAT  2100

Plotting for injuries by events

ggplot(injuries[1:6,], aes(EVTYPE,x)) + geom_bar(stat = "identity") + xlab("Event type") + ylab("injuries")

Damage incurred

In the dataset Property damage is mention as the exponential factor i.e 10^x format

unique(new_storm_data$PROPDMGEXP)
##  [1] "K" "M" ""  "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"

From this we can conclude that in the exponential elements Billion is written as “B” which is 9 as the exponential factor. Similarly M is million i.e 6 K is thousand i.e 3 and H is hundred i.e 2.
For total economic damage we will take the sum of the total of peoperty damage and crop damage.
We will first convert all the symbols to upper case and will replace non-equivalent symbols with “0”

new_storm_data$PROPDMGEXP <- toupper(new_storm_data$PROPDMGEXP)
new_storm_data$CROPDMGEXP <- toupper(new_storm_data$CROPDMGEXP)
damage <- select(new_storm_data, "EVTYPE", "PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")

damage$PROPDMGEXP[damage$PROPDMGEXP %in% c("","+","?","-")] <- "0"

damage$CROPDMGEXP[damage$CROPDMGEXP %in% c("","+","?","-")] <- "0"

#Assigning respective values for property
damage$PROPDMGEXP[damage$PROPDMGEXP %in% c("B")] <- "9"
damage$PROPDMGEXP[damage$PROPDMGEXP %in% c("M")] <- "6"
damage$PROPDMGEXP[damage$PROPDMGEXP %in% c("K")] <- "3"
damage$PROPDMGEXP[damage$PROPDMGEXP %in% c("H")] <- "2"

#Assigning respective values for crop
damage$CROPDMGEXP[damage$CROPDMGEXP %in% c("B")] <- "9"
damage$CROPDMGEXP[damage$CROPDMGEXP %in% c("M")] <- "6"
damage$CROPDMGEXP[damage$CROPDMGEXP %in% c("K")] <- "3"
damage$CROPDMGEXP[damage$CROPDMGEXP %in% c("H")] <- "2"

Now getting the full damage by multiplying with the exponential component

damage$PROPDMGEXP <- 10^(as.numeric(damage$PROPDMGEXP))
damage$PROPERTY <- damage$PROPDMG * damage$PROPDMGEXP

damage$CROPDMGEXP <- 10^(as.numeric(damage$CROPDMGEXP))
damage$CROPS <- damage$CROPDMG * damage$CROPDMGEXP

Now we will calculate the total sum for property and crop damage respectively based on events

prop_damage <- with(damage, aggregate(PROPERTY, by = list(EVTYPE = EVTYPE), FUN = sum))
crop_damage <- with(damage, aggregate(CROPS, by = list(EVTYPE = EVTYPE), FUN = sum))

Property damage

prop_damage <- arrange(prop_damage, desc(x))
head(prop_damage, n = 6)
##              EVTYPE            x
## 1             FLOOD 144657709807
## 2 HURRICANE/TYPHOON  69305840000
## 3           TORNADO  56947380677
## 4       STORM SURGE  43323536000
## 5       FLASH FLOOD  16822673979
## 6              HAIL  15735267513

Crop Damage

crop_damage <- arrange(crop_damage, desc(x))
head(crop_damage, n= 6)
##        EVTYPE           x
## 1     DROUGHT 13972566000
## 2       FLOOD  5661968450
## 3 RIVER FLOOD  5029459000
## 4   ICE STORM  5022113500
## 5        HAIL  3025954473
## 6   HURRICANE  2741910000

From the above 2 data frames we can conclude that the most damage in property is done due to Floods and for crops it is by Drought.
Now for the total damage we need to sum the property damage and crop damage

 new_total <- within(damage, TOTALDMG <- PROPDMG * PROPDMGEXP + CROPDMG * CROPDMGEXP)
#Now we have the total we just need to pair it with the respective event
event_type <- aggregate(new_total$TOTALDMG, by = list(EVTYPE = new_total$EVTYPE), FUN = sum)
event_type <- arrange(event_type, desc(x))
head(event_type, n = 5)
##              EVTYPE            x
## 1             FLOOD 150319678257
## 2 HURRICANE/TYPHOON  71913712800
## 3           TORNADO  57362333947
## 4       STORM SURGE  43323541000
## 5              HAIL  18761221986

Plotting total damages

ggplot(event_type[1:5,], aes(EVTYPE,x, fill = EVTYPE)) + geom_bar(stat = "identity") + xlab("Events") + ylab("Damage(in dollars)")

Results

  1. Tornado are the most harmful event for population heatlth as they have the highest number of fatalities an injuries
  2. Floods have the greatest economic consequences as they have the highest amount of property and crop damage