This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
The NOAA Data set in this Analysis is being explored to address the following questions:
Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
Across the United States, which types of events have the greatest economic consequences?
The original data set contains 37 varaibles with 902,297 observations. The first part of the anaylsis I will identify relevant data columns and subset so that the processing time is sped up. The second part of the analysis will be using my subsetted data to create figures to help aid in addressing the two questions.
The Storm data was downloaded from https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2. I loaded it into R using read.csv()
setwd("C:/Users/Owner/Desktop/ReproResearch")
library(data.table)
data <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))
impCol <- c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
data <- data[impCol]
head(data)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
healthCol <- c("EVTYPE", "FATALITIES", "INJURIES")
healthData <- data[healthCol]
head(healthData)
## EVTYPE FATALITIES INJURIES
## 1 TORNADO 0 15
## 2 TORNADO 0 0
## 3 TORNADO 0 2
## 4 TORNADO 0 2
## 5 TORNADO 0 2
## 6 TORNADO 0 6
damPropCol <- c("EVTYPE", "PROPDMG", "PROPDMGEXP")
damagePropData <- data[damPropCol]
damagePropData <- data.frame(damagePropData)
#Subsetting Events with Property Damages above 0.
damagePropData <- damagePropData[ which(damagePropData$PROPDMG > 0.00), ]
head(damagePropData)
## EVTYPE PROPDMG PROPDMGEXP
## 1 TORNADO 25.0 K
## 2 TORNADO 2.5 K
## 3 TORNADO 25.0 K
## 4 TORNADO 2.5 K
## 5 TORNADO 2.5 K
## 6 TORNADO 2.5 K
#Transforming letters in PROPDMGEXP column to numermic values
damagePropData$PROPDMGEXP <- as.character(damagePropData$PROPDMGEXP)
damagePropData$PROPDMGEXP[damagePropData$PROPDMGEXP == "K"] <- 1000
damagePropData$PROPDMGEXP[damagePropData$PROPDMGEXP == "M"] <- 1000000
damagePropData$PROPDMGEXP[damagePropData$PROPDMGEXP == "B"] <- 1000000000
damagePropData$PROPDMGEXP <- as.numeric(damagePropData$PROPDMGEXP)
## Warning: NAs introduced by coercion
damagePropData$PROPDMGEXP[is.na(damagePropData$PROPDMGEXP)] = 0
#Collapaing PROPDMG and PRODMGEXP into 1 column
damagePropData$PROPDMTOTAL <- damagePropData$PROPDMG * damagePropData$PROPDMGEXP
head(damagePropData)
## EVTYPE PROPDMG PROPDMGEXP PROPDMTOTAL
## 1 TORNADO 25.0 1000 25000
## 2 TORNADO 2.5 1000 2500
## 3 TORNADO 25.0 1000 25000
## 4 TORNADO 2.5 1000 2500
## 5 TORNADO 2.5 1000 2500
## 6 TORNADO 2.5 1000 2500
#Remove CROPDMG and CROPDMGEXP and make PROPDMTOTAL a readable column
col<- c("EVTYPE", "PROPDMTOTAL")
damagePropDataFinal <- damagePropData[col]
damagePropDataFinal$PROPDMTOTAL <- as.integer(damagePropDataFinal$PROPDMTOTAL)
## Warning: NAs introduced by coercion
head(damagePropDataFinal)
## EVTYPE PROPDMTOTAL
## 1 TORNADO 25000
## 2 TORNADO 2500
## 3 TORNADO 25000
## 4 TORNADO 2500
## 5 TORNADO 2500
## 6 TORNADO 2500
#Subsetting Events with Crop Damages above 0
damCropCol <- c("EVTYPE", "CROPDMG", "CROPDMGEXP")
damageCropData <- data[damCropCol]
damageCropData <- data.frame(damageCropData)
#Subsetting Events with Crop Damages above 0.
damageCropData <- damageCropData[ which(damageCropData$CROPDMG > 0.00), ]
head(damageCropData)
## EVTYPE CROPDMG CROPDMGEXP
## 187566 HURRICANE OPAL/HIGH WINDS 10 M
## 187571 THUNDERSTORM WINDS 500 K
## 187581 HURRICANE ERIN 1 M
## 187583 HURRICANE OPAL 4 M
## 187584 HURRICANE OPAL 10 m
## 187653 THUNDERSTORM WINDS 50 K
#Transforming letters in CROPDMGEXP column to numeric values
damageCropData$CROPDMGEXP <- as.character(damageCropData$CROPDMGEXP)
damageCropData$CROPDMGEXP[damageCropData$CROPDMGEXP == "K"] <- 1000
damageCropData$CROPDMGEXP[damageCropData$CROPDMGEXP == "M"] <- 1000000
damageCropData$CROPDMGEXP[damageCropData$CROPDMGEXP == "B"] <- 1000000000
damageCropData$CROPDMGEXP <- as.numeric(damageCropData$CROPDMGEXP)
## Warning: NAs introduced by coercion
damageCropData$CROPDMGEXP[is.na(damageCropData$CROPDMGEXP)] = 0
head(damageCropData)
## EVTYPE CROPDMG CROPDMGEXP
## 187566 HURRICANE OPAL/HIGH WINDS 10 1e+06
## 187571 THUNDERSTORM WINDS 500 1e+03
## 187581 HURRICANE ERIN 1 1e+06
## 187583 HURRICANE OPAL 4 1e+06
## 187584 HURRICANE OPAL 10 0e+00
## 187653 THUNDERSTORM WINDS 50 1e+03
#Collapaing CROPDMG and CRODMGEXP into 1 columns CROPDMGTOTAL
damageCropData$CROPDMTOTAL <- damageCropData$CROPDMG * damageCropData$CROPDMGEXP
head(damageCropData)
## EVTYPE CROPDMG CROPDMGEXP CROPDMTOTAL
## 187566 HURRICANE OPAL/HIGH WINDS 10 1e+06 1e+07
## 187571 THUNDERSTORM WINDS 500 1e+03 5e+05
## 187581 HURRICANE ERIN 1 1e+06 1e+06
## 187583 HURRICANE OPAL 4 1e+06 4e+06
## 187584 HURRICANE OPAL 10 0e+00 0e+00
## 187653 THUNDERSTORM WINDS 50 1e+03 5e+04
#Remove CROPDMG and CROPDMGEXP and readable column
col<- c("EVTYPE", "CROPDMTOTAL")
damageCropDataFinal <- damageCropData[col]
damageCropDataFinal$CROPDMTOTAL <- as.integer(damageCropDataFinal$CROPDMTOTAL)
## Warning: NAs introduced by coercion
head(damageCropDataFinal)
## EVTYPE CROPDMTOTAL
## 187566 HURRICANE OPAL/HIGH WINDS 10000000
## 187571 THUNDERSTORM WINDS 500000
## 187581 HURRICANE ERIN 1000000
## 187583 HURRICANE OPAL 4000000
## 187584 HURRICANE OPAL 0
## 187653 THUNDERSTORM WINDS 50000
library(dplyr)
library(reshape)
library(ggplot2)
healthData <- tbl_df(healthData)
by_Event <- group_by(healthData, EVTYPE)
finalFatalties <- summarize(by_Event, TOTFATS = sum(FATALITIES))
finalFatalties <- arrange(finalFatalties, desc(TOTFATS))
head(finalFatalties)
## Source: local data frame [6 x 2]
##
## EVTYPE TOTFATS
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
## 6 TSTM WIND 504
by_Event <- group_by(healthData, EVTYPE)
finalInjuries <- summarize(by_Event, TOTINJS = sum(INJURIES))
finalInjuries <- arrange(finalInjuries, desc(TOTINJS))
head(finalInjuries)
## Source: local data frame [6 x 2]
##
## EVTYPE TOTINJS
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
## 6 HEAT 2100
final_Total <- merge(finalFatalties, finalInjuries, id.vars="EVTYPE")
final_Total <- arrange(final_Total, desc(TOTFATS))
head(final_Total)
## EVTYPE TOTFATS TOTINJS
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 FLASH FLOOD 978 1777
## 4 HEAT 937 2100
## 5 LIGHTNING 816 5230
## 6 TSTM WIND 504 6957
#I was having a lot of problems making graphs with injuries and fatalties seperated so I combined fatatlties and injuries.
final_Total$TOTAL <- final_Total$TOTFATS + final_Total$TOTINJS
final_Total <- final_Total[1:12,]
final_Total <- arrange(final_Total, desc(TOTAL))
plot_1 <- ggplot(final_Total, aes(x = reorder(EVTYPE, -TOTAL), y = TOTAL)) +
geom_bar(color="RED", stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x = "Weather Event", y = "Fatalities and Injuries", title = "Top 12 Most Deadly Weather Types")
plot_1
library(dplyr)
library(reshape)
library(ggplot2)
damagePropDataFinal <- tbl_df(damagePropDataFinal)
#Grouping Events togther and taking sum of damages from each EVENT
by_Event <- group_by(damagePropDataFinal, EVTYPE)
finalPropDamage <- summarize(by_Event, TOTDM = sum(PROPDMTOTAL))
finalPropDamage <- arrange(finalPropDamage, desc(TOTDM))
head(finalPropDamage)
## Source: local data frame [6 x 2]
##
## EVTYPE TOTDM
## 1 THUNDERSTORM WINDS 1733453612
## 2 TORNADOES, TSTM WIND, HAIL 1600000000
## 3 SEVERE THUNDERSTORM 1205360000
## 4 DROUGHT 1046106000
## 5 HEAVY SNOW 932589148
## 6 LIGHTNING 928659366
finalPropDamage <- finalPropDamage[1:12,]
plot_2 <- ggplot(finalPropDamage, aes(x = reorder(EVTYPE, -TOTDM), y = TOTDM)) +
geom_bar(color="BLUE", stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x = "Weather Event", y = "Property Damages", title = "Top 12 Most Destructive to Property Weather Events")
plot_2
library(dplyr)
library(reshape)
library(ggplot2)
damageCropDataFinal <- tbl_df(damageCropDataFinal)
#Grouping Events togther and taking sum of damages from each EVENT
by_Event <- group_by(damageCropDataFinal, EVTYPE)
finalCropDamage <- summarize(by_Event, TOTDM = sum(CROPDMTOTAL))
finalCropDamage <- arrange(finalCropDamage, desc(TOTDM))
head(finalCropDamage)
## Source: local data frame [6 x 2]
##
## EVTYPE TOTDM
## 1 FLASH FLOOD 1421317100
## 2 EXTREME COLD 1292972999
## 3 FROST/FREEZE 1094086000
## 4 HEAVY RAIN 733399800
## 5 TROPICAL STORM 678346000
## 6 HIGH WIND 638571300
finalCropDamage <- finalCropDamage[1:12,]
plot_3 <- ggplot(finalCropDamage, aes(x = reorder(EVTYPE, -TOTDM), y = TOTDM)) +
geom_bar(color="GREEN", stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x = "Weather Event", y = "Crop Damages", title = "Top 12 Most Destructive to Crop Weather Events")
plot_3
The data results shows the following:
Tornadoes are the most deadly weather event in the United States.
Thunderstorm winds are the most destructive to property
Flashloods are the most destructive to crops in the United States.