Synopsis

This project uses the NOAA Storm Database (1950 - 2011) to analyze severe weather events and answer the following questions:
1. Across the United States, which types of events are most harmful with respect to population health?
2. Across the Unites States, which types of events have the greatest economic consequences?

Data Processing

  1. The data were downloaded from the course website: https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2 and stored in the working directory. Import the data into R using the following code.
storm_data <- read.csv('repdata_data_StormData.csv.bz2')
  1. The data elements relevant to this project include:
  1. Subset the data for fatalities.
fatality_data_subset <- subset(storm_data, FATALITIES > 0, select =c(EVTYPE, FATALITIES))
  1. Subset the data for injuries.
injury_data_subset <- subset(storm_data, INJURIES > 0, select =c(EVTYPE, INJURIES))
  1. Subset the data for property damages.
pdamages_data_subset <- subset(storm_data, PROPDMG > 0, select =c(EVTYPE, PROPDMG, PROPDMGEXP))
  1. Subset the data for crop damages.
cdamages_data_subset <- subset(storm_data, CROPDMG > 0, select =c(EVTYPE, CROPDMG, CROPDMGEXP))
  1. Identify all possible magnitude values associated in the primary file.
unique(storm_data$PROPDMGEXP)
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(storm_data$CROPDMGEXP)
## [1]   M K m B ? 0 k 2
## Levels:  ? 0 2 B k K m M
  1. Define a data transformation function to associate each magnitude value with a multiplier. The multiplier will be applied to property and crop damage estimates to calculate the damage value prior to aggregation.
Damage_calc_fun <- function(damage_amt, mag) 
{
  calcAmt <- 0
  
  if (mag %in% c("-", "?", "+"))    
  {
    calcAmt <- 0     
  }  
  else
  if (mag %in% c("0", ""))    
    {
      calcAmt <- damage_amt     
    }  
  else
  if (mag %in% c("1")) 
    {
    calcAmt <- (damage_amt * 10)    
    }  
  else
  if (mag %in% c("2", "H", "h"))
    {
    calcAmt <- (damage_amt * 100)     
    }  
  else
  if (mag %in% c("3", "K", "k"))
    {
    calcAmt <- (damage_amt * 1000)     
    }  
  else
  if (mag %in% c("4"))  
    {
    calcAmt <- (damage_amt * 10000)         
    }  
  else
  if (mag %in% c("5")) 
    {
    calcAmt <- (damage_amt * 100000)         
    }  
  else    
  if (mag %in% c("6", "M", "m"))
    {
    calcAmt <- (damage_amt * 1000000)         
    }  
  else    
  if (mag %in% c("7")) 
    {
    calcAmt <- (damage_amt * 10000000)         
    }    
  else    
  if (mag %in% c("8"))  
    {
    calcAmt <- (damage_amt * 100000000)         
    }      
  else    
  if (mag %in% c("B")) 
    {
    calcAmt <- (damage_amt * 1000000000)         
    }          
  else 
    {
    stop("Invalid Data")
    }
  
  return(calcAmt)
}
  1. Calculate the damage amounts by applying the Damage_calc_fun function created above.
#  Property Damage
pdamages_data_subset$Prop_Damage <- (mapply(Damage_calc_fun, pdamages_data_subset$PROPDMG, pdamages_data_subset$PROPDMGEXP))
#  Crop Damage
cdamages_data_subset$Crop_Damage <- (mapply(Damage_calc_fun, cdamages_data_subset$CROPDMG, cdamages_data_subset$CROPDMGEXP))
  1. Aggregate the data, sort the results, select, and display the top 10.
#  Fatalities
fatality_data_agg <- aggregate(FATALITIES ~ EVTYPE, data = fatality_data_subset, FUN = "sum", na.rm = TRUE)
fatality_data_agg <- fatality_data_agg[order(fatality_data_agg$FATALITIES, decreasing = TRUE), ]
fatality_data_agg <- fatality_data_agg[1:10, ]
fatality_data_agg
##             EVTYPE FATALITIES
## 141        TORNADO       5633
## 26  EXCESSIVE HEAT       1903
## 35     FLASH FLOOD        978
## 57            HEAT        937
## 97       LIGHTNING        816
## 145      TSTM WIND        504
## 40           FLOOD        470
## 116    RIP CURRENT        368
## 75       HIGH WIND        248
## 2        AVALANCHE        224
#  Injuries
injury_data_agg <- aggregate(INJURIES ~ EVTYPE, data = injury_data_subset, FUN = "sum", na.rm = TRUE)
injury_data_agg <- injury_data_agg[order(injury_data_agg$INJURIES, decreasing = TRUE), ]
injury_data_agg <- injury_data_agg[1:10, ]
injury_data_agg
##                EVTYPE INJURIES
## 129           TORNADO    91346
## 135         TSTM WIND     6957
## 30              FLOOD     6789
## 20     EXCESSIVE HEAT     6525
## 85          LIGHTNING     5230
## 47               HEAT     2100
## 79          ICE STORM     1975
## 28        FLASH FLOOD     1777
## 121 THUNDERSTORM WIND     1488
## 45               HAIL     1361
#  Property Damage
pdamages_data_agg <- aggregate(Prop_Damage ~ EVTYPE, data = pdamages_data_subset, FUN = "sum", na.rm = TRUE)
pdamages_data_agg <- pdamages_data_agg[order(pdamages_data_agg$Prop_Damage, decreasing = TRUE), ]
pdamages_data_agg <- pdamages_data_agg[1:10, ]
pdamages_data_agg
##                EVTYPE  Prop_Damage
## 64              FLOOD 144657709807
## 182 HURRICANE/TYPHOON  69305840000
## 334           TORNADO  56947380617
## 282       STORM SURGE  43323536000
## 51        FLASH FLOOD  16822673979
## 106              HAIL  15735267513
## 174         HURRICANE  11868319010
## 342    TROPICAL STORM   7703890550
## 399      WINTER STORM   6688497251
## 159         HIGH WIND   5270046260
#  Crop Damage
cdamages_data_agg <- aggregate(Crop_Damage ~ EVTYPE, data = cdamages_data_subset, FUN = "sum", na.rm = TRUE)
cdamages_data_agg <- cdamages_data_agg[order(cdamages_data_agg$Crop_Damage, decreasing = TRUE), ]
cdamages_data_agg <- cdamages_data_agg[1:10, ]
cdamages_data_agg
##               EVTYPE Crop_Damage
## 10           DROUGHT 13972566000
## 27             FLOOD  5661968450
## 78       RIVER FLOOD  5029459000
## 72         ICE STORM  5022113500
## 42              HAIL  3025954473
## 64         HURRICANE  2741910000
## 69 HURRICANE/TYPHOON  2607872800
## 23       FLASH FLOOD  1421317100
## 19      EXTREME COLD  1292973000
## 37      FROST/FREEZE  1094086000

Results

Figure 1

# Fatalities
par(mfrow = c(1,2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(fatality_data_agg$FATALITIES,
        names.arg = fatality_data_agg$EVTYPE,
        las = 3,
        main = "Top 10 Fatalities By Event Type", 
        ylab = "Frequency") 
#  Injuries
barplot(injury_data_agg$INJURIES,
        names.arg = injury_data_agg$EVTYPE,
        las = 3,
        main = "Top 10 Injuries By Event Type", 
        ylab = "Frequency") 

Figure 2

par(mfrow = c(1,2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
# Property Damage
barplot(pdamages_data_agg$Prop_Damage,
        names.arg = pdamages_data_agg$EVTYPE,
        las = 3,
        main = "Top 10 Causes of Property Damage", 
        ylab = "Cost ($billions)") 
# Crop Damage
barplot(cdamages_data_agg$Crop_Damage,
        names.arg = cdamages_data_agg$EVTYPE,
        las = 3,
        main = "Top 10 Causes of Crop Damage", 
        ylab = "Cost ($billions)") 

Summary

  1. Which types of events are most harmful with respect to population health?
  1. Which types of events have the greatest economic consequences?