Exploratory Analysis of Weather Events Damage

Synopsis

The Data is an official publication of the National Oceanic and Atmospheric Administration (NOAA). It documents the occurrence of storms and other significant weather phenomena having sufficient intensity to cause loss of life, injuries, significant property damage, and/or disruption to commerce. NOAA receives data from Storm Data from the National Weather Service.

Data distribution is long tailed for both personal injuries and financial damage that caused by certain type of event, with only a handful of events causing most of the damage (both material and non-material). One clear outlier is tornados, which cause over 37 percent of total deaths and 65 percent of total injuries. Leading cause of property damage is flood with almost 40 percent of the total damage, and the leading cause of crop damage is drought (causeing more that 28 percent of the total damage).

Data Processing

1. Loading libraries and Reading Data

2. Setting the locale

3. Reading and cleaning data

More Specifically:

3.1 Grouping EVTYPES

3.2 Sum deaths and injuries by the event type and sort them in the descending order,then remove zero.

3.3 Create variables that show dollar amount of damage,sum them,sort them, and then remove zero.

1. Loading libraries and Reading Data

library(plyr)
library(poweRlaw)
library(maptools)
## Loading required package: sp
## Checking rgeos availability: TRUE
library(ggplot2)
library(taRifx)
## Loading required package: reshape2

2. Setting the locale

Sys.setlocale("LC_TIME", "English")
## [1] "English_United States.1252"
Sys.setlocale("LC_COLLATE", "English")
## [1] "English_United States.1252"
Sys.setlocale("LC_CTYPE", "English")
## [1] "English_United States.1252"
Sys.setlocale("LC_MONETARY", "English")
## [1] "English_United States.1252"

3. Reading and cleaning data

file <- bzfile("repdata_data_StormData.csv.bz2", open = "r")
data <- read.csv(file, stringsAsFactors = FALSE)
close(file)
## Grouping EVTYPES
data$EVTYPE[grep("WIND", data$EVTYPE)] <- "WIND"
data$EVTYPE[grep("TORNADO", data$EVTYPE)] <- "TORNADO"
data$EVTYPE[grep("HEAT", data$EVTYPE)] <- "HEAT"
data$EVTYPE[grep("SNOW", data$EVTYPE)] <- "SNOW"
data$EVTYPE[grep("FLOOD", data$EVTYPE)] <- "FLOOD"
data$EVTYPE[grep("WINTER", data$EVTYPE)] <- "WINTER"
data$EVTYPE[grep("RIP", data$EVTYPE)] <- "RIP"
## sum deaths and injuries by the event type
total_by_event <- ddply(data, .(EVTYPE), summarise, fatalities = sum(FATALITIES, 
    na.rm = TRUE), injuries = sum(INJURIES, na.rm = TRUE))
## sort by deaths and ijuries in the descending order
sorted <- arrange(total_by_event, desc(fatalities), desc(injuries))
## remove events that made no damage (in terms of injuries or fatalities)
remove_no_damage <- sorted[(sorted$fatalities > 0 & sorted$injuries > 0), ]
## Create variables that show dollar amount of damage
Mults <- c(M = 10^6, m = 10^6, K = 10^3, k = 10^3, B = 10^9, b = 10^9)
data$sumP <- data$PROPDMG * Mults[data$PROPDMGEXP]
data$sumP[is.na(data$sumP)] <- 0
data$sumC <- data$CROPDMG * Mults[data$CROPDMGEXP]
data$sumC[is.na(data$sumC)] <- 0
## sum damage by event type and sorting
total_by_event2 <- ddply(data, .(EVTYPE), summarise, property = sum(sumP, na.rm = TRUE), 
    crops = sum(sumC, na.rm = TRUE))
sorted2 <- arrange(total_by_event2, desc(property), desc(crops))
## remove events that made no damage
remove_no_damage2 <- sorted2[(sorted2$property > 0 & sorted2$crops > 0), ]

Results

1. Death And Injuries Statistics

Data distribution is long tailed, vast majority of the damage comes from only a few events (tornado, heat, flood and wind).

We can also see that out of the 570 events, only 65 had any injuries or deaths. Total number of death cases across all causes is 15,047.

Out of the events that did cause death, top event (tornado) is responsible for 37% of total deaths, and top five events for 83% of total deaths.

Statistics for injuries closely follows that for deaths: tornados cause 65% of all injuries and top 5 events cause close to 90% of all the injuries.

## Top 10 events by fatalities and injuries
head(sorted, 10)
##          EVTYPE fatalities injuries
## 1       TORNADO       5636    91407
## 2          HEAT       3138     9154
## 3         FLOOD       1523     8603
## 4          WIND       1446    11495
## 5     LIGHTNING        816     5230
## 6           RIP        577      529
## 7        WINTER        277     1876
## 8     AVALANCHE        224      170
## 9          SNOW        162     1118
## 10 EXTREME COLD        160      231
## total deaths
total_deaths <- sum(remove_no_damage$fatalities)
total_deaths
## [1] 15052
## percentage of deaths caused by tornado
top_death_percentage <- remove_no_damage$fatalities[1]/total_deaths
top_death_percentage
## [1] 0.3744
# percentage of deaths caused by top five events
top_five_deaths <- sum(remove_no_damage$fatalities[1:5])
top_five_deaths_percentage <- top_five_deaths/total_deaths
top_five_deaths_percentage
## [1] 0.8344
## total injuries
total_injuries <- sum(remove_no_damage$injuries)
total_injuries
## [1] 140356
## percentage of injuries caused by tornado
top_injury_percentage <- remove_no_damage$injuries[1]/total_injuries
top_injury_percentage
## [1] 0.6513
# percentage of injuries caused by top five events
top_five_injuries <- sum(remove_no_damage$injuries[1:5])
top_five_injury_percentage <- top_five_injuries/total_injuries
top_five_injury_percentage
## [1] 0.8969

Create graph showing fatalities and injuries distribution

library(maptools)
par(mfrow = c(1, 2))
plot(remove_no_damage$fatalities, col = "blue", pch = 16, ylab = "Fatalities")
pointLabel(x = 0, remove_no_damage$fatalities, ifelse(remove_no_damage$fatalities > 
    1000, as.character(remove_no_damage$EVTYPE), NA), allowSmallOverlap = FALSE, 
    cex = 0.7, pos = 4)
plot(remove_no_damage$injuries, col = "red", pch = 16, ylab = "Injuries")
pointLabel(x = 0, remove_no_damage$injuries, ifelse(remove_no_damage$injuries > 
    7000, as.character(remove_no_damage$EVTYPE), NA), allowSmallOverlap = FALSE, 
    cex = 0.7, pos = 4)

plot of chunk unnamed-chunk-18

2. Property and crop damage

The Data distribution is also long tailed. We can see that out of the 570 events, only 56 had any property or crop damage reported. Total property damage reported is around 420 billion dollars.

Out of the events that did cause property damage, top event (flood) is responsible for about 40% of total damage, and top five events for 84% of total damage.

Total crop damage has been just under 50 billion dollars, with top event causing 29% of the total damage, and top 5 events causing 76% of the total damage.

## Top 10 events by propery and crop damage
head(sorted2, 10)
##               EVTYPE  property     crops
## 1              FLOOD 1.674e+11 1.224e+10
## 2  HURRICANE/TYPHOON 6.931e+10 2.608e+09
## 3            TORNADO 5.699e+10 4.150e+08
## 4        STORM SURGE 4.332e+10 5.000e+03
## 5               WIND 1.774e+10 2.159e+09
## 6               HAIL 1.573e+10 3.026e+09
## 7          HURRICANE 1.187e+10 2.742e+09
## 8     TROPICAL STORM 7.704e+09 6.783e+08
## 9             WINTER 6.717e+09 4.244e+07
## 10          WILDFIRE 4.765e+09 2.955e+08
## total property damage
total_property_damage <- sum(remove_no_damage2$property)
total_property_damage
## [1] 4.236e+11
## percentage of propery damage caused by flood
top_event_property <- max(remove_no_damage2$property)/total_property_damage
top_event_property
## [1] 0.3951
# percentage of propery damage caused by top five events
top_five_property <- sum(remove_no_damage2$property[1:5])
top_five__property_percentage <- top_five_property/total_property_damage
top_five__property_percentage
## [1] 0.8374
## total crop damage
total_crop_damage <- sum(remove_no_damage2$crops)
total_crop_damage
## [1] 4.87e+10
## percentage of crop damage caused by drought
top_event_crop <- max(remove_no_damage2$crops)/total_crop_damage
top_event_crop
## [1] 0.2869
# percentage of propery damage caused by top five events
sort_crop <- arrange(remove_no_damage2, desc(crops))
top_five_crops <- sum(sort_crop$crops[1:5])
top_five_crops_percentage <- top_five_crops/total_crop_damage
top_five_crops_percentage
## [1] 0.7598

Create graph showing propery and crop damage distribution

library(maptools)
par(mfrow = c(1, 2))
plot(remove_no_damage2$property, col = "blue", pch = 16, ylab = "Propery Damage")
pointLabel(x = 0, remove_no_damage2$property, ifelse(remove_no_damage2$property > 
    1.7e+10, as.character(remove_no_damage2$EVTYPE), NA), allowSmallOverlap = FALSE, 
    cex = 0.7, pos = 4)
plot(sort_crop$crops, col = "red", pch = 16, ylab = "Crop Damage")
pointLabel(x = 0, sort_crop$crops, ifelse(sort_crop$crops > 2.9e+09, as.character(sort_crop$EVTYPE), 
    NA), allowSmallOverlap = FALSE, cex = 0.7, pos = 4)

plot of chunk unnamed-chunk-26