Synopsis

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

The NOAA Data set in this Analysis is being explored to address the following questions:

  1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

  2. Across the United States, which types of events have the greatest economic consequences?

The original data set contains 37 varaibles with 902,297 observations. The first part of the anaylsis I will identify relevant data columns and subset so that the processing time is sped up. The second part of the analysis will be using my subsetted data to create figures to help aid in addressing the two questions.

Data Processing

The Storm data was downloaded from https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2. I loaded it into R using read.csv()

setwd("C:/Users/Owner/Desktop/ReproResearch")
library(data.table)
data <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))

Subsetting data to look at the types of events that occur with fatalities/injuries and property/crop damage

impCol <- c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
data <- data[impCol]
head(data)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO          0       15    25.0          K       0           
## 2 TORNADO          0        0     2.5          K       0           
## 3 TORNADO          0        2    25.0          K       0           
## 4 TORNADO          0        2     2.5          K       0           
## 5 TORNADO          0        2     2.5          K       0           
## 6 TORNADO          0        6     2.5          K       0

Subsetting data to look at just injuries and fatalities

healthCol <- c("EVTYPE", "FATALITIES", "INJURIES")
healthData <- data[healthCol]
head(healthData)
##    EVTYPE FATALITIES INJURIES
## 1 TORNADO          0       15
## 2 TORNADO          0        0
## 3 TORNADO          0        2
## 4 TORNADO          0        2
## 5 TORNADO          0        2
## 6 TORNADO          0        6

Subsetting and Processing data to look at the relationship between Event Type and Property and Crop Damage.

damPropCol <- c("EVTYPE", "PROPDMG", "PROPDMGEXP")
damagePropData <- data[damPropCol]
damagePropData <- data.frame(damagePropData)
#Subsetting Events with Property Damages above 0.
damagePropData <- damagePropData[ which(damagePropData$PROPDMG > 0.00), ]
head(damagePropData)
##    EVTYPE PROPDMG PROPDMGEXP
## 1 TORNADO    25.0          K
## 2 TORNADO     2.5          K
## 3 TORNADO    25.0          K
## 4 TORNADO     2.5          K
## 5 TORNADO     2.5          K
## 6 TORNADO     2.5          K
#Transforming letters in PROPDMGEXP column to numermic values
damagePropData$PROPDMGEXP <- as.character(damagePropData$PROPDMGEXP)
damagePropData$PROPDMGEXP[damagePropData$PROPDMGEXP == "K"] <- 1000
damagePropData$PROPDMGEXP[damagePropData$PROPDMGEXP == "M"] <- 1000000
damagePropData$PROPDMGEXP[damagePropData$PROPDMGEXP == "B"] <- 1000000000
damagePropData$PROPDMGEXP <- as.numeric(damagePropData$PROPDMGEXP)
## Warning: NAs introduced by coercion
damagePropData$PROPDMGEXP[is.na(damagePropData$PROPDMGEXP)] = 0

#Collapaing PROPDMG and PRODMGEXP into 1 column
damagePropData$PROPDMTOTAL <- damagePropData$PROPDMG * damagePropData$PROPDMGEXP
head(damagePropData)
##    EVTYPE PROPDMG PROPDMGEXP PROPDMTOTAL
## 1 TORNADO    25.0       1000       25000
## 2 TORNADO     2.5       1000        2500
## 3 TORNADO    25.0       1000       25000
## 4 TORNADO     2.5       1000        2500
## 5 TORNADO     2.5       1000        2500
## 6 TORNADO     2.5       1000        2500
#Remove CROPDMG and CROPDMGEXP and make PROPDMTOTAL a readable column
col<- c("EVTYPE", "PROPDMTOTAL")
damagePropDataFinal <- damagePropData[col]
damagePropDataFinal$PROPDMTOTAL <- as.integer(damagePropDataFinal$PROPDMTOTAL)
## Warning: NAs introduced by coercion
head(damagePropDataFinal)
##    EVTYPE PROPDMTOTAL
## 1 TORNADO       25000
## 2 TORNADO        2500
## 3 TORNADO       25000
## 4 TORNADO        2500
## 5 TORNADO        2500
## 6 TORNADO        2500
#Subsetting Events with Crop Damages above 0
damCropCol <- c("EVTYPE", "CROPDMG", "CROPDMGEXP")
damageCropData <- data[damCropCol]
damageCropData <- data.frame(damageCropData)
#Subsetting Events with Crop Damages above 0.
damageCropData <- damageCropData[ which(damageCropData$CROPDMG > 0.00), ]
head(damageCropData)
##                           EVTYPE CROPDMG CROPDMGEXP
## 187566 HURRICANE OPAL/HIGH WINDS      10          M
## 187571        THUNDERSTORM WINDS     500          K
## 187581            HURRICANE ERIN       1          M
## 187583            HURRICANE OPAL       4          M
## 187584            HURRICANE OPAL      10          m
## 187653        THUNDERSTORM WINDS      50          K
#Transforming letters in CROPDMGEXP column to numeric values
damageCropData$CROPDMGEXP <- as.character(damageCropData$CROPDMGEXP)
damageCropData$CROPDMGEXP[damageCropData$CROPDMGEXP == "K"] <- 1000
damageCropData$CROPDMGEXP[damageCropData$CROPDMGEXP == "M"] <- 1000000
damageCropData$CROPDMGEXP[damageCropData$CROPDMGEXP == "B"] <- 1000000000
damageCropData$CROPDMGEXP <- as.numeric(damageCropData$CROPDMGEXP)
## Warning: NAs introduced by coercion
damageCropData$CROPDMGEXP[is.na(damageCropData$CROPDMGEXP)] = 0
head(damageCropData)
##                           EVTYPE CROPDMG CROPDMGEXP
## 187566 HURRICANE OPAL/HIGH WINDS      10      1e+06
## 187571        THUNDERSTORM WINDS     500      1e+03
## 187581            HURRICANE ERIN       1      1e+06
## 187583            HURRICANE OPAL       4      1e+06
## 187584            HURRICANE OPAL      10      0e+00
## 187653        THUNDERSTORM WINDS      50      1e+03
#Collapaing CROPDMG and CRODMGEXP into 1 columns CROPDMGTOTAL
damageCropData$CROPDMTOTAL <- damageCropData$CROPDMG * damageCropData$CROPDMGEXP
head(damageCropData)
##                           EVTYPE CROPDMG CROPDMGEXP CROPDMTOTAL
## 187566 HURRICANE OPAL/HIGH WINDS      10      1e+06       1e+07
## 187571        THUNDERSTORM WINDS     500      1e+03       5e+05
## 187581            HURRICANE ERIN       1      1e+06       1e+06
## 187583            HURRICANE OPAL       4      1e+06       4e+06
## 187584            HURRICANE OPAL      10      0e+00       0e+00
## 187653        THUNDERSTORM WINDS      50      1e+03       5e+04
#Remove CROPDMG and CROPDMGEXP and readable column
col<- c("EVTYPE", "CROPDMTOTAL")
damageCropDataFinal <- damageCropData[col]
damageCropDataFinal$CROPDMTOTAL <- as.integer(damageCropDataFinal$CROPDMTOTAL)
## Warning: NAs introduced by coercion
head(damageCropDataFinal)
##                           EVTYPE CROPDMTOTAL
## 187566 HURRICANE OPAL/HIGH WINDS    10000000
## 187571        THUNDERSTORM WINDS      500000
## 187581            HURRICANE ERIN     1000000
## 187583            HURRICANE OPAL     4000000
## 187584            HURRICANE OPAL           0
## 187653        THUNDERSTORM WINDS       50000

Results

Exploration of Injuries and Fatalities on Population from Weather Events

library(dplyr)
library(reshape)
library(ggplot2)

healthData <- tbl_df(healthData)
by_Event <- group_by(healthData, EVTYPE)
finalFatalties <- summarize(by_Event, TOTFATS = sum(FATALITIES))
finalFatalties <- arrange(finalFatalties, desc(TOTFATS))
head(finalFatalties)
## Source: local data frame [6 x 2]
## 
##           EVTYPE TOTFATS
## 1        TORNADO    5633
## 2 EXCESSIVE HEAT    1903
## 3    FLASH FLOOD     978
## 4           HEAT     937
## 5      LIGHTNING     816
## 6      TSTM WIND     504
by_Event <- group_by(healthData, EVTYPE)
finalInjuries <- summarize(by_Event, TOTINJS = sum(INJURIES))
finalInjuries <- arrange(finalInjuries, desc(TOTINJS))
head(finalInjuries)
## Source: local data frame [6 x 2]
## 
##           EVTYPE TOTINJS
## 1        TORNADO   91346
## 2      TSTM WIND    6957
## 3          FLOOD    6789
## 4 EXCESSIVE HEAT    6525
## 5      LIGHTNING    5230
## 6           HEAT    2100
final_Total <- merge(finalFatalties, finalInjuries, id.vars="EVTYPE")
final_Total <- arrange(final_Total, desc(TOTFATS))
head(final_Total)
##           EVTYPE TOTFATS TOTINJS
## 1        TORNADO    5633   91346
## 2 EXCESSIVE HEAT    1903    6525
## 3    FLASH FLOOD     978    1777
## 4           HEAT     937    2100
## 5      LIGHTNING     816    5230
## 6      TSTM WIND     504    6957
#I was having a lot of problems making graphs with injuries and fatalties seperated so I combined fatatlties and injuries.
final_Total$TOTAL <- final_Total$TOTFATS + final_Total$TOTINJS 

final_Total <- final_Total[1:12,]
final_Total <- arrange(final_Total, desc(TOTAL))

plot_1 <- ggplot(final_Total, aes(x = reorder(EVTYPE, -TOTAL), y = TOTAL)) +
  geom_bar(color="RED", stat = "identity") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(x = "Weather Event", y = "Fatalities and Injuries", title = "Top 12 Most Deadly Weather Types")
plot_1

  • The figure shows that Tornadoes are the most deadly weather event. Excessive Heat and Thunderstorms follow a distant 2nd and 3rd.

Exploration of Economic Property Damage from Weather Events

library(dplyr)
library(reshape)
library(ggplot2)

damagePropDataFinal <- tbl_df(damagePropDataFinal)
#Grouping Events togther and taking sum of damages from each EVENT
by_Event <- group_by(damagePropDataFinal, EVTYPE)
finalPropDamage <- summarize(by_Event, TOTDM = sum(PROPDMTOTAL))
finalPropDamage <- arrange(finalPropDamage, desc(TOTDM))
head(finalPropDamage)
## Source: local data frame [6 x 2]
## 
##                       EVTYPE      TOTDM
## 1         THUNDERSTORM WINDS 1733453612
## 2 TORNADOES, TSTM WIND, HAIL 1600000000
## 3        SEVERE THUNDERSTORM 1205360000
## 4                    DROUGHT 1046106000
## 5                 HEAVY SNOW  932589148
## 6                  LIGHTNING  928659366
finalPropDamage <- finalPropDamage[1:12,]

plot_2 <- ggplot(finalPropDamage, aes(x = reorder(EVTYPE, -TOTDM), y = TOTDM)) +
  geom_bar(color="BLUE", stat = "identity") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(x = "Weather Event", y = "Property Damages", title = "Top 12 Most Destructive to Property Weather Events")
plot_2

  • The figure shows that Thunderstroms and Winds are the most destructive to property. Tornadoes mixed with thunderstorm wind and hail are the 2nd most economic destructive weather event.

Exploration of Economic Crop Damage from Weather Events

library(dplyr)
library(reshape)
library(ggplot2)

damageCropDataFinal <- tbl_df(damageCropDataFinal)
#Grouping Events togther and taking sum of damages from each EVENT
by_Event <- group_by(damageCropDataFinal, EVTYPE)
finalCropDamage <- summarize(by_Event, TOTDM = sum(CROPDMTOTAL))
finalCropDamage <- arrange(finalCropDamage, desc(TOTDM))
head(finalCropDamage)
## Source: local data frame [6 x 2]
## 
##           EVTYPE      TOTDM
## 1    FLASH FLOOD 1421317100
## 2   EXTREME COLD 1292972999
## 3   FROST/FREEZE 1094086000
## 4     HEAVY RAIN  733399800
## 5 TROPICAL STORM  678346000
## 6      HIGH WIND  638571300
finalCropDamage <- finalCropDamage[1:12,]

plot_3 <- ggplot(finalCropDamage, aes(x = reorder(EVTYPE, -TOTDM), y = TOTDM)) +
  geom_bar(color="GREEN", stat = "identity") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(x = "Weather Event", y = "Crop Damages", title = "Top 12 Most Destructive to Crop Weather Events")
plot_3

  • The figure shows that flashfloods are the most destructive to crops. Extreme cold and frostfreeze follow in 2nd and 3rd.

Conclusion

The data results shows the following: