An analysis of Storm Events Data based on NOAA reports

SYNOPSIS

This analysis explores U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm data focusing on the event types that: 1. Are the most harmfull for people - based on number of fatalities and injuries 2. Cause the most economic damage - baesed on financial estimates of damages for properties and crops.

Key results: * With respect to population health the most dangerous ware Tornados - 65% of all reported injuries and 37% of all fatalities. * Considering property damage the most damaging ware Floods with the total cost amount to over 144 billion dollars. * Considering crop damage the most damaging ware Droughts with the total cost amount to almost 14 billion dollars.

## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine

DATA PROCESSING

Loading the data and subsetting the necessary variables

storm <- read.csv("repdata-data-StormData.csv", sep=",", header = TRUE) # loading data
storm <- tbl_df(storm) # cenverting to tbl class
storm <- select(storm, EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP) # subseting necessary data

Processing Health related data: 1. Counting sums of injuries by type of storm event and saving in separate table ‘type_ing’ 2. Counting sums of fatalities by type of storm event and saving in separate table ‘type_fat’ 3. Combining two tables as ‘people_harm’ 4. Counting percentages of injuries and fatalities for each event type

# 1. HARM TO POPULATION HEALTH ---------------------------------------------

# Sum of injuries by EVTYPE
type_ing <- summarise(group_by(storm, EVTYPE), sum = sum(INJURIES, na.rm = TRUE))
type_ing <- arrange(type_ing, desc(sum))

# Sum of fatalities by EVTYPE
type_fat <- summarise(group_by(storm, EVTYPE), sum = sum(FATALITIES, na.rm = TRUE))
type_fat <- arrange(type_fat, desc(sum))

# Creating table of population harm by event type
people_harm <- left_join(type_ing, type_fat, by="EVTYPE") # joining injuries and fatalities
people_harm <- mutate(people_harm, percentx = percent(sum.x/sum(sum.x)), percenty = percent(sum.y/sum(sum.y))) # counting percentage of injuries and fatalities
people_harm <- people_harm[c(1,2,4,3,5)] # reordering columns
colnames(people_harm) <- c("Event type", "Number of injuries", "Percentage of injuries", "Number of fatalities", "Percentage of fatalities") # assigning proper col names

Processing economic damage data: 1. Filtering data that have properly coded exponent and are of significant magnitude (exploraroty data analysis indicated that there is a coding inconsistency in the database). 2. Counting property damages - multiplying PROPDMG by the exponent 3. Summarizing property damage costs by event type 4. Repeating steps from 1 to 3 for crop damage data

# 2. ECONOMIC CONSWQUENCES--------------------------------------------------

# Cleaning and preparing property damage data

# Filter records of significant magnitude and with properly coded PROPDMGEXP
table(storm$PROPDMGEXP)
## 
##             -      ?      +      0      1      2      3      4      5 
## 465934      1      8      5    216     25     13      4      4     28 
##      6      7      8      B      h      H      K      m      M 
##      4      5      1     40      1      6 424665      7  11330
storm_prop <- filter(storm, PROPDMGEXP == "" | PROPDMGEXP == "B" | PROPDMGEXP == "M" | PROPDMGEXP == "K")

# Assign numeric values for B, M and K exponent symbols.
storm_prop <- mutate(storm_prop, exponent = ifelse(PROPDMGEXP == "", "1", ifelse(PROPDMGEXP == "B", 1e+09 ,ifelse(PROPDMGEXP == "M", 1e+06, 1000))))
# Creates new variable with final property damage
storm_prop <- mutate(storm_prop, prop_damage = PROPDMG * as.numeric(exponent))
# Summarizing and sorting event types by property damage
type_prop <- summarise(group_by(storm_prop, EVTYPE), sum = sum(prop_damage, na.rm = TRUE))
type_prop <- arrange(type_prop, desc(sum))
top_prop <- head(type_prop, 10)


# Cleaning and preparing crop damage data

# Filter records of significant magnitude and with properly coded CROPDMGEXP
table(storm$CROPDMGEXP)
## 
##             ?      0      2      B      k      K      m      M 
## 618413      7     19      1      9     21 281832      1   1994
storm_crop <- filter(storm, CROPDMGEXP == "" | CROPDMGEXP == "B" | CROPDMGEXP == "M" | CROPDMGEXP == "K")

# Assign numeric values for B, M and K exponent symbols.
storm_crop <- mutate(storm_crop, exponent = ifelse(CROPDMGEXP == "", "1", ifelse(CROPDMGEXP == "B", 1e+09 ,ifelse(CROPDMGEXP == "M", 1e+06, 1000))))
# Creates new variable with final property damage
storm_crop <- mutate(storm_crop, crop_damage = CROPDMG * as.numeric(exponent))
# Summarizing and sorting event types by property damage
type_crop <- summarise(group_by(storm_crop, EVTYPE), sum = sum(crop_damage, na.rm = TRUE))
type_crop <- arrange(type_crop, desc(sum))
top_crop <- head(type_crop, 10)

RESULTS

Top 10 events with greatest harms for population health

# Printing table with 10, the most harmful for population, event types
qplot(1:10, 1:10, geom = "blank") + 
  theme_bw() +
  theme(line = element_blank(),
        text = element_blank()) +
  annotation_custom(grob = tableGrob(people_harm[1:10, c(1,3,5) ]))

# Printing barplot for events with greatest property damage

barplot(top_prop$sum/(10^9), las = 3, names.arg = top_prop$EVTYPE, cex.names=0.5, main = "Top 10 events with greatest property damage", ylab = "Cost of damage (in billions of dollars)")

# Printing barplot for events with greatest crop damage

barplot(top_crop$sum/(10^9), las = 3, names.arg = top_crop$EVTYPE, cex.names=0.5, main = "Top 10 events with greatest crop damage", ylab = "Cost of damage (in billions of dollars)")