##Synopsis This analysis explores critical weather events that have significant public health and economic impacts. The data comes from the NOAA database, which harbors information of major storms and severe weather episodes in the United States (1950-2011). It includes when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
The data can be downloaded from the course website: Storm Data
Documentation of the database is available here:
. National Weather Service Storm Data Documentation . National Climatic Data Center Storm Events FAQ
# New project created in RStudio called: "RFinalProject"
dir()
## [1] "NOAA_Storm_Data.csv.bz2" "RFinalProject.pdf"
## [3] "RFinalProject.Rmd" "RFinalProject.Rproj"
library(knitr)
## Warning: package 'knitr' was built under R version 4.0.3
library(plyr)
library(ggplot2)
library(lattice)
## Warning: package 'lattice' was built under R version 4.0.3
library(data.table)
## Warning: package 'data.table' was built under R version 4.0.3
library(grid)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.0.3
# NOAA Weather Impact Data
fileUrl<-"http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
# Downloading the data
download.file(fileUrl,destfile = "~/NOAA_Storm_Data.csv.bz2", method = "curl")
date()
## [1] "Mon Dec 28 15:08:14 2020"
# Reading BZip format
system.time(df <- read.csv(bzfile("NOAA_Storm_Data.csv.bz2"),
header = TRUE,
strip.white=TRUE,
stringsAsFactors = FALSE))
## user system elapsed
## 49.81 0.65 50.54
Event Type (EVTYPE), Begin Date of Event (BGN_DATE), Event Human Fatalities (FATALITIES), Event Human Injuries (INJURIES), Event Estimated Property Damages (PROPDMG), Event Property Damage Decimal Precision (PROPDMGEXP), Event Estimated Crop Damages (CROPDMG), Event Property Damage Decimal Precision (CROPDMGEXP)
df <- df[ , c("EVTYPE", "BGN_DATE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
#Transforming raw date to a proper date format:
df$BGN_DATE <- as.POSIXct(df$BGN_DATE,format="%m/%d/%Y %H:%M:%S")
#Looking at data structure and types of events
str(df)
## 'data.frame': 902297 obs. of 8 variables:
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_DATE : POSIXct, format: "1950-04-18" "1950-04-18" ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
head(unique(df$EVTYPE))
## [1] "TORNADO" "TSTM WIND" "HAIL"
## [4] "FREEZING RAIN" "SNOW" "ICE STORM/FLASH FLOOD"
Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health ?
#Aggregate fatalities ad injuries by Event Type
fatalities <- aggregate(FATALITIES ~ EVTYPE, data=df, sum)
injuries <- aggregate(INJURIES ~ EVTYPE, data=df, sum)
#Arrange in descending order by Event Type by number of fatalities or injuries
fatalities <- arrange(fatalities,desc(FATALITIES),EVTYPE)[1:10,]
injuries <- arrange(injuries,desc(INJURIES),EVTYPE)[1:10,]
#Convert Event Type variable to factor for analysis
fatalities$EVTYPE <- factor(fatalities$EVTYPE, levels = fatalities$EVTYPE)
injuries$EVTYPE <- factor(injuries$EVTYPE, levels = injuries$EVTYPE)
fatalities
## EVTYPE FATALITIES
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
## 6 TSTM WIND 504
## 7 FLOOD 470
## 8 RIP CURRENT 368
## 9 HIGH WIND 248
## 10 AVALANCHE 224
injuries
## EVTYPE INJURIES
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
## 6 HEAT 2100
## 7 ICE STORM 1975
## 8 FLASH FLOOD 1777
## 9 THUNDERSTORM WIND 1488
## 10 HAIL 1361
#Plot of fatalities by Event Type
fatalitiesbyweather <- ggplot(fatalities, aes(x = EVTYPE, y = FATALITIES)) +
geom_bar(stat = "identity", fill = "blue", width = NULL) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Fatalities")
#Plot of injuries by Event Type
injuriesbyweather <- ggplot(injuries, aes(x = EVTYPE, y = INJURIES)) +
geom_bar(stat = "identity", fill = "blue", width = NULL) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Injuries")
##QUESTION 2
Across the United States, which types of events have the greatest economic consequences ?
#Normalize Event Damage Amount to Integer Format
tmpPROPDMG <- mapvalues(df$PROPDMGEXP,
c("K","M","", "B","m","+","0","5","6","?","4","2","3","h","7","H","-","1","8"),
c(1e3,1e6, 1, 1e9,1e6, 1, 1,1e5,1e6, 1,1e4,1e2,1e3, 1,1e7,1e2, 1, 10,1e8))
tmpCROPDMG <- mapvalues(df$CROPDMGEXP,
c("","M","K","m","B","?","0","k","2"),
c( 1,1e6,1e3,1e6,1e9,1,1,1e3,1e2))
# Make Numeric Property and Crop Damage
df$TOTAL_PROPDMG <- as.numeric(tmpPROPDMG) * df$PROPDMG
df$TOTAL_CROPDMG <- as.numeric(tmpCROPDMG) * df$CROPDMG
# Show Column Names Now in the Data Frame (df)
colnames(df)
## [1] "EVTYPE" "BGN_DATE" "FATALITIES" "INJURIES"
## [5] "PROPDMG" "PROPDMGEXP" "CROPDMG" "CROPDMGEXP"
## [9] "TOTAL_PROPDMG" "TOTAL_CROPDMG"
# Create a Total Damage Amount which is the Total of Property and Crop Damage Amounts
df$TOTALDMG <- df$TOTAL_PROPDMG + df$TOTAL_CROPDMG
# Sum total damages for property and crop by Weather Event Type (EVTYPE):
propdamage <- aggregate(TOTAL_PROPDMG ~ EVTYPE, data=df, sum)
cropdamage <- aggregate(TOTAL_CROPDMG ~ EVTYPE, data=df, sum)
# Sum total damages (property + crop) by Weather Event Type (EVTYPE):
totaldamage <- aggregate(TOTALDMG ~ EVTYPE, data=df, sum)
# Arrange descending damages for property and crop by Weather Event Type (EVTYPE) (Top 10 Events):
cropdamage <- arrange(cropdamage,desc(cropdamage$TOTAL_CROPDMG),EVTYPE)[1:10,]
propdamage <- arrange(propdamage,desc(propdamage$TOTAL_PROPDMG),EVTYPE)[1:10,]
totaldamage <- arrange(totaldamage,desc(totaldamage$TOTALDMG),EVTYPE)[1:10,]
# Set Weather Event Type (EVTYPE) as a Factor Variable:
propdamage$EVTYPE <- factor(propdamage$EVTYPE, levels = propdamage$EVTYPE)
cropdamage$EVTYPE <- factor(cropdamage$EVTYPE, levels = cropdamage$EVTYPE)
totaldamage$EVTYPE <- factor(totaldamage$EVTYPE, levels = totaldamage$EVTYPE)
propdamage
## EVTYPE TOTAL_PROPDMG
## 1 FLOOD 144657709807
## 2 HURRICANE/TYPHOON 69305840000
## 3 TORNADO 56947380677
## 4 STORM SURGE 43323536000
## 5 FLASH FLOOD 16822673979
## 6 HAIL 15735267513
## 7 HURRICANE 11868319010
## 8 TROPICAL STORM 7703890550
## 9 WINTER STORM 6688497251
## 10 HIGH WIND 5270046295
#Plot of PROPERTY DAMAGE by Event Type
propplotdamage <- ggplot(propdamage, aes(x = EVTYPE, y = TOTAL_PROPDMG)) +
geom_bar(stat = "identity", fill = "green") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Property Damages ($)")
cropdamage
## EVTYPE TOTAL_CROPDMG
## 1 DROUGHT 13972566000
## 2 FLOOD 5661968450
## 3 RIVER FLOOD 5029459000
## 4 ICE STORM 5022113500
## 5 HAIL 3025954473
## 6 HURRICANE 2741910000
## 7 HURRICANE/TYPHOON 2607872800
## 8 FLASH FLOOD 1421317100
## 9 EXTREME COLD 1292973000
## 10 FROST/FREEZE 1094086000
#Plot of CROP DAMAGE by Event Type
cropplotdamage <- ggplot(cropdamage, aes(x = EVTYPE, y = TOTAL_CROPDMG)) +
geom_bar(stat = "identity", fill = "blue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Crop Damages ($)")
totaldamage
## EVTYPE TOTALDMG
## 1 FLOOD 150319678257
## 2 HURRICANE/TYPHOON 71913712800
## 3 TORNADO 57362333947
## 4 STORM SURGE 43323541000
## 5 HAIL 18761221986
## 6 FLASH FLOOD 18243991079
## 7 DROUGHT 15018672000
## 8 HURRICANE 14610229010
## 9 RIVER FLOOD 10148404500
## 10 ICE STORM 8967041360
*Tornados represent the most signifcant harm to public health.
grid.arrange(fatalitiesbyweather, injuriesbyweather, ncol=2, nrow=1,
top = textGrob("Public Health Impact - Fatalities & Injuries from top 10 Weather Events",gp=gpar(fontsize=14,font=6)))
*Floods cause the most significant total damage (property and crop).
*Drought events inflict the most damage to crops.
*Floods cause most harm for property.
#Plot of TOTAL DAMAGE by Event Type
totplotdamage <- ggplot(totaldamage, aes(x = EVTYPE, y = TOTALDMG)) +
geom_bar(stat = "identity", fill = "yellow") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Total Prop & Crop Damages ($)")
#plot(totplotdamage)
grid.arrange(propplotdamage, cropplotdamage, totplotdamage, ncol=3, nrow=1,
top = textGrob("Damage Impact - Property, Crop, & Overall from top 10 Weather Events ",gp=gpar(fontsize=14,font=3)))